Пример #1
0
 def handle(self, *args, **options):
     # create a connection
     mturk = MTurkConnection(
         getattr(settings, 'MTURK_AWS_KEY', settings.MEDIASYNC['AWS_KEY']),
         getattr(settings, 'MTURK_AWS_SECRET', settings.MEDIASYNC['AWS_SECRET']),
         host = 'mechanicalturk.sandbox.amazonaws.com' if options['sandbox'] else 'mechanicalturk.amazonaws.com'
     )
     
     # if --delete, delete all the old ones first.
     if options['delete_first']:
         for hit in mturk.get_all_hits():
             mturk.disable_hit(hit.HITId)
     
     if options['exclude']:
         exclude_reader = csv.DictReader(open(options['exclude'], 'r'))
         exclude = set()
         for row in exclude_reader:
             exclude.add(row['td_id'])
     
     # iterate over items and create them one by one
     cursor = connection.cursor()
     cursor.execute(
         """
         select entity_id, type from matchbox_wikipediainfo, matchbox_entity where entity_id not in (select entity_id from matchbox_sunlightinfo where bio is not null) and bio != '' and bio is not null and entity_id = matchbox_entity.id %s order by entity_id limit %s;
         """ % ("and type = '%s'" % options['type'] if options['type'] else '', '%s'), # hack to put the interpolation string back in for PG to catch it
     [options['count']])
     
     for row in cursor:
         if options['exclude']:
             if str(row[0]).replace('-', '') in exclude:
                 continue
         
         if options['practice']:
             print row[0]
             continue
         
         try:
             hit = mturk.create_hit(
                 question = FakeQuestionForm(get_hit_xml(row[0])),
                 max_assignments = 3,
                 annotation = row[0],
                 
                 title = "Wikipedia match validation",
                 description = "We have matched a set of entities in a database to descriptions pulled from Wikipedia via an automated process. Confirm that the match is correct.",
                 reward = 0.06,
                 duration = datetime.timedelta(minutes=30),
                 lifetime = datetime.timedelta(days=7),
                 keywords = ['wikipedia', 'matching'],
                 approval_delay = datetime.timedelta(days=3),
                 qualifications = Qualifications([PercentAssignmentsApprovedRequirement("GreaterThan", 90)])
             )
             print hit[0].HITId
         except Exception as e:
             sys.stderr.write("Failed to create hit %s\n" % row[0])
             sys.stderr.write(getattr(e, 'body', ''))
             sys.stderr.write('\n')
         except:
             pass
Пример #2
0
	def getAllHits(self, hits):
		mtc = MTurkConnection(aws_access_key_id=self.ACCESS_ID,
                      aws_secret_access_key=self.SECRET_KEY,
                      host=self.HOST)
		for hit in hits:
		    assignments = mtc.get_assignments(hit)
		    for assignment in assignments:
		        print "Answers of the worker %s" % assignment.WorkerId
		        for question_form_answer in assignment.answers[0]:
		            for key, value in question_form_answer.fields:
		                print "%s: %s" % (key,value)
		        mtc.approve_assignment(assignment.AssignmentId)
		        print "--------------------"
		    mtc.disable_hit(hit)
Пример #3
0
def cancel_hit(hit):
    hostURL = SANDBOX_HOST if hit.sandbox else HOST

    connection = MTurkConnection(
        aws_access_key_id=hit.aws_access_key, aws_secret_access_key=hit.aws_secret_key, host=hostURL
    )

    return connection.disable_hit(hit.mturkid)
Пример #4
0
def cancel_hit(hit):
    hostURL = SANDBOX_HOST if hit.sandbox else HOST

    connection = MTurkConnection(aws_access_key_id=hit.aws_access_key,
                                 aws_secret_access_key=hit.aws_secret_key,
                                 host=hostURL)

    return connection.disable_hit(hit.mturkid)
    def delete_hits(self, hits_to_delete):
        print "Connecting to Turk host at"
        print app.config['MTURK_HOST']
        sys.stdout.flush()

        mturk = MTurkConnection(app.config['AWS_ACCESS_KEY_ID'],
                                app.config['AWS_SECRET_ACCESS_KEY'],
                                host=app.config['MTURK_HOST'])

        print "Deleting extra hits"

        for hit in hits_to_delete:
            try:
                mturk.disable_hit(hit)
            except MTurkRequestError:
                print "Trying to delete hit that doesn't exist"

        return True
def get_final_score(HITId):

    mtc = MTurkConnection(aws_access_key_id=ACCESS_ID,
                      aws_secret_access_key=SECRET_KEY,
                      host=HOST)

    hits = mtc.get_all_hits()
    hits_dict = dict()

    for hit in hits:
        hits_dict[hit.HITId] = hit

    curr_hit = hits_dict[HITId]

    sum_opin = 0
    sum_acc = 0
    index = 0
    assignments = mtc.get_assignments(curr_hit.HITId)
    for assignment in assignments:
        #print "Answers of the worker %s" % assignment.WorkerId
        for question_form_answer in assignment.answers[0]:
            for key in question_form_answer.fields:
                if question_form_answer.qid == 'design':
                    #print "%s" % (key)
                    index=index+1
                    sum_opin+=int(key)
                else:
                    sum_acc += answer_key(key)
        mtc.approve_assignment(assignment.AssignmentId)
        #print "--------------------"
    mtc.disable_hit(curr_hit.HITId)

    #print "Average Score %s" % (sum_opin/index)
    #print "Legible Accuracy: %s%%" % (sum_acc/index)

    avg_ratings = float(sum_opin) / float(index)
    avg_ratings_score = avg_ratings * 25
    avg_legib_score = float(sum_acc) / float(index)

    # Calculate weighted average,
    # 60% for compare match score,
    # 40^% for ratings
    weighted_avg = 0.70*avg_legib_score + 0.30*avg_ratings
    return weighted_avg
Пример #7
0
class MTurkClient:

    # SETUP
    # ===========
           
    def __init__(self,aws_access_key,aws_secret_key,aws_mode):
        self.mode = aws_mode
        if aws_mode == 'sandbox':
            self.host = 'mechanicalturk.sandbox.amazonaws.com'
        else:
            self.host = 'mechanicalturk.amazonaws.com'

        self.c = MTurkConnection(
            aws_access_key,
            aws_secret_key,
            host=self.host)
            
    default_settings = {
        'lifetime': DAY,
        'duration': 10 * MINUTE,
        'approval_delay': DAY,

        'title': "[title]",
        'description': "[description]",
        'keywords': [],

        'reward': 0.01,
        'max_assignments': 1,
        
        'height': 700,
        
        'qualifications': [],
    }
            
    # HITS
    # ===========
    def create_hit(self,url,extra_settings):
        "Eventually, this should take a TEMPLATE and a dictionary of INPUT data that's put into that template. This function would then create an HTML file locally (assuming we're running on a web server) by replacing template {tags} with input values, and then send the URL to the newly created page to MTurk."
       
        settings = self.default_settings.copy()
        settings.update(extra_settings)

        settings['reward'] = Price(settings['reward'])
        settings['qualifications'] = qualification.Qualifications(settings['qualifications'])
        settings['keywords'] = ','.join(settings['keywords'])
        height = settings.pop('height')

        hit = self.c.create_hit(question=ExternalQuestion(url,height),**settings)[0]
        #print 'Created hit %s' % hit.HITId
        return hit.HITId,hit.HITTypeId
        
        #hit_type=None, # Let Amazon do this automatically
        #annotation=None, # Optional annotation for our system to use
        #questions=None, # If you want to create multiple HITs at a time? Probably irrelevant for External
        #response_groups=None, # Unclear what this does 
        
    def get_hit(self,hit_id):
        return self.c.get_hit(hit_id)[0]
        
    def hit_results(self,hit_id,type=None): # type in ['Submitted','Approved','Rejected',None]
        results = {}
    
        assignments = self.c.get_assignments(hit_id, status=None, page_size=100)
        for asst in assignments:
            results.setdefault(asst.AssignmentId,{})
            answers = asst.answers[0]
            for qfa in answers:
                field, response = qfa.qid, qfa.fields[0]
                results[asst.AssignmentId][field] = response
                
            results[asst.AssignmentId]['worker_id'] = asst.WorkerId
                       
            results[asst.AssignmentId]['accept_time'] = datetime.strptime(asst.AcceptTime,"%Y-%m-%dT%H:%M:%SZ")
            results[asst.AssignmentId]['submit_time'] = datetime.strptime(asst.SubmitTime,"%Y-%m-%dT%H:%M:%SZ")
                
        return results
        
    # URL of a HIT on MTurk
    def hit_url_turk(self,hit_id):
        pass
        
    def hit_url_external(self,hit_id):
        pass
        
    def extend_hit(self,hit_id,extras):
        return self.c.extend_hit(hit_id, extras)
        
    @catcherror
    def delete_hit(self,hit_id):
        self.c.disable_hit(hit_id)
        
    # Deletes all the HITS on the server. Risky!
    def cleanup(self):
        for hit in self.c.get_all_hits():
            self.delete_hit(hit.HITId)
            
    # ASSIGNMENTS
    # ===========
    @catcherror
    def approve(self, asst_id, feedback=None):
        return self.c.approve_assignment(asst_id, feedback)
        
    @catcherror
    def reject(self, asst_id, feedback=None):
        return self.c.reject_assignment(asst_id, feedback)

    def block(self,worker_id,feedback=None):
        return self.c.block_worker(worker_id, feedback)
        
    def unblock(self,worker_id,feedback=None):
        return self.c.unblock_worker(worker_id, feedback)
        
    def bonus(self,asst,amount,feedback):
        return self.c.grant_bonus(asst.worker, asst.asst_id, Price(amount), feedback)
        
    # STATUS / DIAGNOSTICS
    # --------------------
    def balance(self):
        return self.c.get_account_balance()[0]
Пример #8
0
rev_hits = waitUntilHIT1Complete(mtc,hitIds)

possibleAns = defaultdict(Set)

for hit in rev_hits:
	if hit.HITId in hitIds:
		assignments = mtc.get_assignments(hit.HITId)
		for assignment in assignments:
			#print("Answers of the worker %s" % assignment.WorkerId)
			for question_form_answer in assignment.answers[0]:
				for value in question_form_answer.fields:
					#print("%s: %s" % (hitsDic[hit.HITId],value))
					possibleAns[hitsDic[hit.HITId]].add(value)
			#print("--------------------")
			mtc.approve_assignment(assignment.AssignmentId)
		mtc.disable_hit(hit.HITId)

print('Creating the second stage HITS')

hitIds = Set()
answersDic = {}

for key, val in possibleAns.iteritems():
	sentence, context = key
	hitId, answers = createHIT2(val,sentence,context)
	hitIds.add(hitId)
	hitsDic[hitId] = (sentence, context)
	answersDic[sentence] = answers

rev_hits = waitUntilHIT1Complete(mtc,hitIds)
Пример #9
0
def main(argv):
	if (len(argv) < 2):
		print "Usage: tweetbeats.py <song_title> <instrument_number> <optional_topic>"
	else:
		user_topic = ""
		# check for command line argument
		if len(argv) > 2:
			user_topic = argv[2]

		'''
		 '  Gather Tweets
		'''
		print "Gathering Tweets..."
		tc = TweetCollector()
		results = tc.CollectTweets(user_topic)
		print "Topic: " + results[0]
		'''
		 '  Create Hits
		'''
		print "Creating HITs..."
		mtur = MTurk(ACCESS_ID, SECRET_KEY,HOST)
		for result in results[1]:
			res = filter(lambda x: x in string.printable, result)
			new_id = mtur.createHit(res)

		mtc = MTurkConnection(aws_access_key_id=ACCESS_ID, aws_secret_access_key=SECRET_KEY, host=HOST)

		hits = get_all_reviewable_hits(mtc)
		while (len(hits) < MIN_TWEETS):
			print "Not enough hits. Will try again in 10 seconds...."
			sleep(10)
			hits = get_all_reviewable_hits(mtc)

		hits3 = []
		for hit in hits:
			assignments = mtc.get_assignments(hit.HITId)
			for assignment in assignments:
				print "Answers of the worker %s" % assignment.WorkerId
				answers = []
				for question_form_answer in assignment.answers[0]:
					for value in question_form_answer.fields:
						answers.append(int(value))
				print "Responses : ", answers
				hits3.append(answers)
				mtc.approve_assignment(assignment.AssignmentId)
				print "--------------------"
			mtc.disable_hit(hit.HITId)

		#Remove unused HITS; make 5 passes to clean up as best we can
		print "Removing unused HITs... Pass #1 of 5"
		hits = mtc.get_all_hits()
		for hit in hits:
			mtc.disable_hit(hit.HITId)

		print "Removing unused HITs... Pass #2 of 5"
		sleep(20)
		hits = mtc.get_all_hits()
		for hit in hits:
			mtc.disable_hit(hit.HITId)

		print "Removing unused HITs... Pass #3 of 5"
		sleep(20)
		hits = mtc.get_all_hits()
		for hit in hits:
			mtc.disable_hit(hit.HITId)

		print "Removing unused HITs... Pass #4 of 5"
		sleep(20)
		hits = mtc.get_all_hits()
		for hit in hits:
			mtc.disable_hit(hit.HITId)

		print "Removing unused HITs... Pass #5 of 5"
		sleep(20)
		hits = mtc.get_all_hits()
		for hit in hits:
			mtc.disable_hit(hit.HITId)

		'''
		 '  Make Hits into Music
		'''
		initializeTrack(argv[1])
		time = 1
		for result in hits3:

			duration = 0
			durationResult = result[1]
			if durationResult == 1:
				duration = .375 		#dotted sixteenth
			elif durationResult == 2:
				duration = .5 	 		#eighth
			elif durationResult == 3:
				duration = .75 			#dotted eigth
			elif durationResult == 4:
				duration = 1 			#quarter
			elif durationResult == 5:
				duration = 1.5 			#dotted quarter
			elif durationResult == 6:
				duration = 2 			#half
			elif durationResult == 7:
				duration = 3 			#dotted half
			elif durationResult == 8:
				duration = 4 			#whole

			shift = random.choice([-11, -10, -9, -8, -7, -6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11])

			chord = result[0]
			if chord == 1:
				addChord(time, duration, 100, 60 + shift, 64 + shift, 67 + shift, -1) #C maj 	Joy
			elif chord == 2:
				addChord(time, duration, 100, 60 + shift, 63 + shift, 67 + shift, 70 + shift) #C min9	Sadness
			elif chord == 3:
				addChord(time, duration, 100, 60 + shift, 64 + shift, 66 + shift, 69 + shift) #C dim7	Anger
			elif chord == 4:
				addChord(time, duration, 100, 60 + shift, 64 + shift, 66 + shift, -1) #C flat5	Fear
			elif chord == 5:
				addChord(time, duration, 100, 60 + shift, 64 + shift, 67 + shift, 69 + shift) #C maj6	Trust
			elif chord == 6:
				addChord(time, duration, 100, 60 + shift, 63 + shift, 67 + shift, 69 + shift) #C m6 	Distrust
			elif chord == 7:
				addChord(time, duration, 100, 60 + shift, 63 + shift, 66 + shift, 70 + shift) #C m7b5	Surprise
			elif chord == 8:
				addChord(time, duration, 100, 60 + shift, 64 + shift, 67 + shift, 71 + shift) #C maj7	Anticipation

			time += duration
		addChord(time, 4, 000, 60, 60, 60, 60) #silence to allow last note to fade out
		closeTrack(argv[0])

		music_file = argv[0] + ".mid" 
		# set up the mixer 
		freq = 44100 # audio CD quality 
		bitsize = -16 # unsigned 16 bit 
		channels = 2 # 1 is mono, 2 is stereo 
		buffer = 2048 # number of samples 
		pygame.mixer.init(freq, bitsize, channels, buffer) 
		# optional volume 0 to 1.0 
		pygame.mixer.music.set_volume(1.0) 
		
		pygame.mixer.music.load(music_file) 
		print "Music file %s loaded!" % music_file 
		clock = pygame.time.Clock() 
		pygame.mixer.music.play() 
		while pygame.mixer.music.get_busy(): 
			# check if playback has finished 
			clock.tick(30) 
Пример #10
0
	def disableHit(self, hit_id):
		mtc = MTurkConnection(aws_access_key_id=self.ACCESS_ID,
                      aws_secret_access_key=self.SECRET_KEY,
                      host=self.HOST)
		mtc.disable_hit(hit_id, response_groups=None)
Пример #11
0
class MTurkProvider(object):
    description = 'This is a task authored by a requester on Daemo, a research crowdsourcing platform. ' \
                  'Mechanical Turk workers are welcome to do it'
    keywords = ['daemo']
    countries = ['US', 'CA']
    min_hits = 1000

    def __init__(self, host, aws_access_key_id, aws_secret_access_key):
        self.host = host
        self.connection = MTurkConnection(
            aws_access_key_id=aws_access_key_id,
            aws_secret_access_key=aws_secret_access_key,
            host=settings.MTURK_HOST)
        self.connection.APIVersion = "2014-08-15"
        if not self.host:
            raise ValueError("Please provide a host url")

    def get_connection(self):
        return self.connection

    @staticmethod
    def _mturk_system_qualifications(qualification):
        requirements = []
        for item in qualification.items.all():
            if item.expression['attribute'] not in [
                    'location', 'approval_rate', 'total_tasks'
            ]:
                continue
            requirement = None
            if item.expression['attribute'] == 'location':
                op = OP_IN if item.expression['operator'] == 'in' else OP_NOT_IN
                requirement = MultiLocaleRequirement(op, [
                    val.strip() for val in item.expression['value']
                    if val is not None and val != ''
                ])
            elif item.expression['attribute'] == 'approval_rate':
                op = OP_GT if item.expression['operator'] == 'gt' else OP_LT
                requirement = PercentAssignmentsApprovedRequirement(
                    op, item.expression['value'])
            elif item.expression['attribute'] == 'total_tasks':
                op = OP_GT if item.expression['operator'] == 'gt' else OP_LT
                requirement = NumberHitsApprovedRequirement(
                    op, item.expression['value'])

            requirements.append(requirement)
        return requirements

    def get_qualifications(self, project, boomerang_threshold, add_boomerang):
        requirements = []
        if project.qualification is not None:
            requirements += self._mturk_system_qualifications(
                project.qualification)
        boomerang_qual, success = self.create_qualification_type(
            owner_id=project.owner_id,
            project_id=project.group_id,
            name='Boomerang Score #{}'.format(project.group_id),
            flag=FLAG_Q_BOOMERANG,
            description='No description available')
        boomerang = None
        if boomerang_threshold <= int(settings.BOOMERANG_MIDPOINT * 100):
            for i, bucket in enumerate(WAIT_LIST_BUCKETS):
                if int(bucket[1] * 100) <= boomerang_threshold:

                    boomerang_blacklist, success = \
                        self.create_qualification_type(owner_id=project.owner_id,
                                                       name='Boomerang Waitlist #{}-{}'.format(project.group_id, len(
                                                           WAIT_LIST_BUCKETS) - i),
                                                       flag=FLAG_Q_BOOMERANG,
                                                       description='No description available',
                                                       deny=True,
                                                       project_id=project.group_id,
                                                       bucket=bucket)
                    if success and add_boomerang:
                        boomerang = BoomerangRequirement(
                            qualification_type_id=boomerang_blacklist.type_id,
                            comparator=OP_DNE,
                            integer_value=None)
                        requirements.append(boomerang)

        else:
            boomerang = BoomerangRequirement(
                qualification_type_id=boomerang_qual.type_id,
                comparator=OP_GTEQ,
                integer_value=boomerang_threshold)
            if success and add_boomerang:
                requirements.append(boomerang)
        return Qualifications(requirements), boomerang_qual

    def create_hits(self, project, tasks=None, repetition=None):
        # if project.min_rating > 0:
        #     return 'NOOP'
        if not tasks:
            cursor = connection.cursor()
            # noinspection SqlResolve
            query = '''
                SELECT
                  max(id)                   id,
                  repetition,
                  group_id,
                  repetition - sum(existing_assignments) remaining_assignments,
                  min_rating
                FROM (
                       SELECT
                         t_rev.id,
                         t.group_id,
                         t.min_rating,
                         p.repetition,
                         CASE WHEN ma.id IS NULL OR ma.status IN (%(skipped)s, %(rejected)s, %(expired)s)
                           THEN 0
                         ELSE 1 END existing_assignments
                       FROM crowdsourcing_task t
                         INNER JOIN crowdsourcing_project p ON t.project_id = p.id
                         INNER JOIN crowdsourcing_task t_rev ON t_rev.group_id = t.group_id
                         LEFT OUTER JOIN mturk_mturkhit mh ON mh.task_id = t_rev.id
                         LEFT OUTER JOIN mturk_mturkassignment ma ON ma.hit_id = mh.id
                       WHERE t.project_id = (%(project_id)s) AND t_rev.exclude_at IS NULL
                       AND t_rev.deleted_at IS NULL
                ) t
                GROUP BY group_id, repetition, min_rating HAVING sum(existing_assignments) < repetition;
            '''
            cursor.execute(
                query, {
                    'skipped': TaskWorker.STATUS_SKIPPED,
                    'rejected': TaskWorker.STATUS_REJECTED,
                    'expired': TaskWorker.STATUS_EXPIRED,
                    'project_id': project.id
                })
            tasks = cursor.fetchall()

        rated_workers = Rating.objects.filter(
            origin_type=Rating.RATING_REQUESTER).count()
        add_boomerang = rated_workers > 0

        duration = project.timeout if project.timeout is not None else datetime.timedelta(
            hours=24)
        lifetime = project.deadline - timezone.now(
        ) if project.deadline is not None else datetime.timedelta(days=7)

        for task in tasks:
            question = self.create_external_question(task[0])
            mturk_hit = MTurkHIT.objects.filter(task_id=task[0]).first()
            qualifications, boomerang_qual = self.get_qualifications(
                project=project,
                boomerang_threshold=int(round(task[4], 2) * 100),
                add_boomerang=add_boomerang)
            qualifications_mask = 0
            if qualifications is not None:
                qualifications_mask = FLAG_Q_LOCALE + FLAG_Q_HITS + FLAG_Q_RATE + FLAG_Q_BOOMERANG
            hit_type, success = self.create_hit_type(
                title=project.name,
                description=self.description,
                price=project.price,
                duration=duration,
                keywords=self.keywords,
                approval_delay=datetime.timedelta(days=2),
                qual_req=qualifications,
                qualifications_mask=qualifications_mask,
                boomerang_threshold=int(round(task[4], 2) * 100),
                owner_id=project.owner_id,
                boomerang_qual=boomerang_qual)
            if not success:
                return 'FAILURE'

            if mturk_hit is None:
                try:
                    hit = self.connection.create_hit(
                        hit_type=hit_type.string_id,
                        max_assignments=task[3],
                        lifetime=lifetime,
                        question=question)[0]
                    self.set_notification(hit_type_id=hit.HITTypeId)
                    mturk_hit = MTurkHIT(hit_id=hit.HITId,
                                         hit_type=hit_type,
                                         task_id=task[0])
                except MTurkRequestError as e:
                    error = e.errors[0][0]
                    if error == 'AWS.MechanicalTurk.InsufficientFunds':
                        message = {
                            "type": "ERROR",
                            "detail":
                            "Insufficient funds on your Mechanical Turk account!",
                            "code": error
                        }

                        redis_publisher = RedisPublisher(facility='bot',
                                                         users=[project.owner])
                        message = RedisMessage(json.dumps(message))
                        redis_publisher.publish_message(message)
                    return 'FAILED'
            else:
                if mturk_hit.hit_type_id != hit_type.id:
                    result, success = self.change_hit_type_of_hit(
                        hit_id=mturk_hit.hit_id,
                        hit_type_id=hit_type.string_id)
                    if success:
                        mturk_hit.hit_type = hit_type
            mturk_hit.save()
        return 'SUCCESS'

    def create_hit_type(self,
                        owner_id,
                        title,
                        description,
                        price,
                        duration,
                        boomerang_threshold,
                        keywords=None,
                        approval_delay=None,
                        qual_req=None,
                        qualifications_mask=0,
                        boomerang_qual=None):
        hit_type = MTurkHITType.objects.filter(
            owner_id=owner_id,
            name=title,
            description=description,
            price=Decimal(str(price)),
            duration=duration,
            qualifications_mask=qualifications_mask,
            boomerang_threshold=boomerang_threshold).first()
        if hit_type is not None:
            return hit_type, True

        reward = Price(price)
        try:
            mturk_ht = self.connection.register_hit_type(
                title=title,
                description=description,
                reward=reward,
                duration=duration,
                keywords=keywords,
                approval_delay=approval_delay,
                qual_req=qual_req)[0]
            hit_type = MTurkHITType(owner_id=owner_id,
                                    name=title,
                                    description=description,
                                    price=Decimal(str(price)),
                                    keywords=keywords,
                                    duration=duration,
                                    qualifications_mask=qualifications_mask,
                                    boomerang_qualification=boomerang_qual,
                                    boomerang_threshold=boomerang_threshold)
            hit_type.string_id = mturk_ht.HITTypeId
            hit_type.save()
        except MTurkRequestError:
            return None, False
        return hit_type, True

    def create_external_question(self, task, frame_height=800):
        task_hash = Hashids(salt=settings.SECRET_KEY,
                            min_length=settings.ID_HASH_MIN_LENGTH)
        task_id = task_hash.encode(task)
        url = self.host + '/mturk/task/?taskId=' + task_id
        question = ExternalQuestion(external_url=url,
                                    frame_height=frame_height)
        return question

    def update_max_assignments(self, task):
        task = Task.objects.get(id=task['id'])
        mturk_hit = task.mturk_hit
        if not mturk_hit:
            raise MTurkHIT.DoesNotExist(
                "This task is not associated to any mturk hit")
        assignments_completed = task.task_workers.filter(~Q(status__in=[
            TaskWorker.STATUS_REJECTED, TaskWorker.STATUS_SKIPPED,
            TaskWorker.STATUS_EXPIRED
        ])).count()
        remaining_assignments = task.project.repetition - assignments_completed
        if remaining_assignments > 0 and mturk_hit.num_assignments == mturk_hit.mturk_assignments. \
            filter(status=TaskWorker.STATUS_SUBMITTED).count() and \
                mturk_hit.mturk_assignments.filter(status=TaskWorker.STATUS_IN_PROGRESS).count() == 0:
            self.add_assignments(hit_id=mturk_hit.hit_id, increment=1)
            self.extend_hit(hit_id=mturk_hit.hit_id)
            mturk_hit.status = MTurkHIT.STATUS_IN_PROGRESS
            mturk_hit.num_assignments += 1
            mturk_hit.save()
        elif remaining_assignments == 0:
            self.expire_hit(hit_id=mturk_hit.hit_id)
            mturk_hit.status = MTurkHIT.STATUS_EXPIRED
            mturk_hit.save()
        elif remaining_assignments > 0 and \
                mturk_hit.status == MTurkHIT.STATUS_EXPIRED:
            self.extend_hit(hit_id=mturk_hit.hit_id)
            mturk_hit.status = MTurkHIT.STATUS_IN_PROGRESS
        return 'SUCCESS'

    def get_assignment(self, assignment_id):
        try:
            return self.connection.get_assignment(assignment_id)[0], True
        except MTurkRequestError as e:
            error = e.errors[0][0]
            if error == 'AWS.MechanicalTurk.InvalidAssignmentState':
                return assignment_id, False
            return None, False

    def set_notification(self, hit_type_id):
        self.connection.set_rest_notification(
            hit_type=hit_type_id,
            url=self.host + '/api/mturk/notification',
            event_types=[
                'AssignmentReturned', 'AssignmentAbandoned',
                'AssignmentAccepted', 'AssignmentSubmitted'
            ])

    def approve_assignment(self, task_worker):
        task_worker_obj = TaskWorker.objects.get(id=task_worker['id'])
        if hasattr(task_worker_obj, 'mturk_assignments'
                   ) and task_worker_obj.mturk_assignments.first() is not None:
            try:
                self.connection.approve_assignment(
                    task_worker_obj.mturk_assignments.first().assignment_id)
            except MTurkRequestError:
                return False
        return True

    def reject_assignment(self, task_worker):
        task_worker_obj = TaskWorker.objects.get(id=task_worker['id'])
        if hasattr(task_worker_obj, 'mturk_assignments'
                   ) and task_worker_obj.mturk_assignments.first() is not None:
            try:
                self.connection.reject_assignment(
                    task_worker_obj.mturk_assignments.first().assignment_id)
            except MTurkRequestError:
                return False
        return True

    def expire_hit(self, hit_id):
        try:
            self.connection.expire_hit(hit_id)
        except MTurkRequestError:
            return False
        return True

    def disable_hit(self, hit_id):
        try:
            self.connection.disable_hit(hit_id)
        except MTurkRequestError:
            return False
        return True

    def extend_hit(self, hit_id):
        try:
            self.connection.extend_hit(hit_id=hit_id,
                                       expiration_increment=604800)  # 7 days
        except MTurkRequestError:
            return False
        return True

    def add_assignments(self, hit_id, increment=1):
        try:
            self.connection.extend_hit(hit_id=hit_id,
                                       assignments_increment=increment)
        except MTurkRequestError:
            return False
        return True

    def test_connection(self):
        try:
            return self.connection.get_account_balance()[0], True
        except MTurkRequestError as e:
            error = e.errors[0][0]
            if error == 'AWS.NotAuthorized':
                return None, False
            return None, False

    def get_account_balance(self):
        try:
            return self.connection.get_account_balance()[0]
        except MTurkRequestError:
            return None

    def create_qualification_type(self,
                                  owner_id,
                                  name,
                                  flag,
                                  description,
                                  project_id,
                                  auto_granted=False,
                                  auto_granted_value=None,
                                  deny=False,
                                  bucket=None):
        # noinspection SqlResolve
        query = '''
            SELECT * FROM (
                SELECT
                  task.target_id,
                  task.username,
                  round(task.task_w_avg::NUMERIC, 2) rating
                  --round(coalesce(task.task_w_avg, requester.requester_w_avg,
                  --  platform.platform_w_avg)::NUMERIC, 2) rating
                FROM (
                               SELECT
                                 target_id,
                                 origin_id,
                                 project_id,
                                 username,
                                 sum(weight * power((%(BOOMERANG_TASK_ALPHA)s), t.row_number))
                                 / sum(power((%(BOOMERANG_TASK_ALPHA)s), t.row_number)) task_w_avg
                               FROM (

                                      SELECT
                                        r.id,
                                        r.origin_id,
                                        p.group_id                              project_id,
                                        weight,
                                        r.target_id,
                                        -1 + row_number()
                                        OVER (PARTITION BY target_id
                                          ORDER BY tw.created_at DESC) AS row_number,
                                          u.username username

                                      FROM crowdsourcing_rating r
                                        INNER JOIN crowdsourcing_task t ON t.id = r.task_id
                                        INNER JOIN crowdsourcing_project p ON p.id = t.project_id
                                        INNER JOIN crowdsourcing_taskworker tw ON t.id = tw.task_id
                                          AND tw.worker_id=r.target_id
                                        INNER JOIN auth_user u ON u.id = r.target_id
                                      WHERE origin_id = (%(origin_id)s) AND origin_type = (%(origin_type)s)) t
                               GROUP BY origin_id, target_id, project_id, username)
                             task WHERE task.project_id = (%(project_id)s)
            ) r
        '''
        extra_query = 'WHERE rating BETWEEN (%(lower_bound)s) AND (%(upper_bound)s);'
        params = {
            'origin_type': Rating.RATING_REQUESTER,
            'origin_id': owner_id,
            'project_id': project_id,
            'BOOMERANG_REQUESTER_ALPHA': settings.BOOMERANG_REQUESTER_ALPHA,
            'BOOMERANG_PLATFORM_ALPHA': settings.BOOMERANG_PLATFORM_ALPHA,
            'BOOMERANG_TASK_ALPHA': settings.BOOMERANG_TASK_ALPHA
        }
        obj_params = {'upper_bound': 300, 'lower_bound': 100}
        if deny and bucket is not None:
            query += extra_query
            params.update({'upper_bound': bucket[1], 'lower_bound': bucket[0]})
            obj_params.update({
                'upper_bound': bucket[1] * 100,
                'lower_bound': bucket[0] * 100,
                'is_blacklist': True
            })
        cursor = connection.cursor()
        cursor.execute(query, params=params)
        worker_ratings_raw = cursor.fetchall()
        worker_ratings = [{
            "worker_id": r[0],
            "worker_username": r[1],
            "rating": r[2]
        } for r in worker_ratings_raw]

        qualification = MTurkQualification.objects.filter(owner_id=owner_id,
                                                          flag=flag,
                                                          name=name).first()
        assigned_workers = []
        if qualification is None:
            try:
                qualification_type = self.connection. \
                    create_qualification_type(name=name, description=description,
                                              status='Active',
                                              auto_granted=auto_granted,
                                              auto_granted_value=auto_granted_value)[0]
                qualification = MTurkQualification.objects.create(
                    owner_id=owner_id,
                    flag=flag,
                    name=name,
                    description=description,
                    auto_granted=auto_granted,
                    auto_granted_value=auto_granted_value,
                    type_id=qualification_type.QualificationTypeId,
                    **obj_params)
            except MTurkRequestError:
                return None, False
        else:
            assigned_workers = MTurkWorkerQualification.objects.values(
                'worker').filter(qualification=qualification).values_list(
                    'worker', flat=True)

        for rating in worker_ratings:
            user_name = rating["worker_username"].split('.')
            if len(user_name) == 2 and user_name[0] == 'mturk':
                mturk_worker_id = user_name[1].upper()
                if mturk_worker_id not in assigned_workers:
                    self.assign_qualification(
                        qualification_type_id=qualification.type_id,
                        worker_id=mturk_worker_id,
                        value=int(rating['rating'] * 100))
                defaults = {
                    'qualification': qualification,
                    'worker': mturk_worker_id,
                    'score': int(rating['rating'] * 100)
                }
                MTurkWorkerQualification.objects.update_or_create(
                    qualification=qualification,
                    worker=mturk_worker_id,
                    defaults=defaults)
        return qualification, True

    def change_hit_type_of_hit(self, hit_id, hit_type_id):
        try:
            result = self.connection.change_hit_type_of_hit(
                hit_id=hit_id, hit_type=hit_type_id)
        except MTurkRequestError:
            return None, False
        return result, True

    def update_worker_boomerang(self, project_id, worker_id, task_avg,
                                requester_avg):
        """
        Update boomerang for project
        Args:
            project_id:
            worker_id:
            task_avg:
            requester_avg

        Returns:
            str
        """
        hit = MTurkHIT.objects.select_related(
            'hit_type__boomerang_qualification').filter(
                task__project__group_id=project_id).first()
        if hit is not None:
            qualification = hit.hit_type.boomerang_qualification
            worker_qual = MTurkWorkerQualification.objects.filter(
                qualification=qualification, worker=worker_id).first()
            if worker_qual is not None:
                self.update_score(worker_qual,
                                  score=int(task_avg * 100),
                                  override=True)
            else:
                MTurkWorkerQualification.objects.create(
                    qualification=qualification,
                    worker=worker_id,
                    score=int(task_avg * 100),
                    overwritten=True)
                self.assign_qualification(
                    qualification_type_id=qualification.type_id,
                    worker_id=worker_id,
                    value=int(task_avg * 100))

                # other_quals = MTurkWorkerQualification.objects.filter(~Q(qualification=qualification),
                #                                                       worker=worker_id,
                #                                                       overwritten=False)
                # for q in other_quals:
                #     self.update_score(q, score=int(requester_avg * 100))
        return 'SUCCESS'

    def update_score(self, worker_qual, score, override=False):
        if worker_qual is None:
            return False
        try:
            self.connection.update_qualification_score(
                worker_qual.qualification.type_id, worker_qual.worker, score)
            worker_qual.overwritten = override
            worker_qual.score = score
            worker_qual.save()
        except MTurkRequestError:
            return False
        return True

    def assign_qualification(self, qualification_type_id, worker_id, value=1):
        """
        Revoke a qualification from a WorkerId
        Args:
            qualification_type_id:
            worker_id:
            value

        Returns:
            bool
        """
        try:
            self.connection.assign_qualification(qualification_type_id,
                                                 worker_id,
                                                 value,
                                                 send_notification=False)
            return True
        except MTurkRequestError:
            return False

    def revoke_qualification(self, qualification_type_id, worker_id):
        try:
            self.connection.revoke_qualification(
                qualification_type_id=qualification_type_id,
                subject_id=worker_id)
            return True
        except MTurkRequestError:
            return False

    def notify_workers(self, worker_ids, subject, message_text):
        try:
            self.connection.notify_workers(worker_ids, subject, message_text)
            return True
        except MTurkRequestError:
            return False
Пример #12
0
class HitCreator():
    def __init__(self):
        if settings.IS_DEV_ENV or settings.USE_AMT_SANDBOX:
            HOST = 'mechanicalturk.sandbox.amazonaws.com'
        else:
            HOST = 'mechanicalturk.amazonaws.com'

        self.connection = MTurkConnection(
                aws_access_key_id=settings.AWS_ACCESS_KEY_ID,
                aws_secret_access_key=settings.AWS_SECRET_ACCESS_KEY,
                host=HOST)

    def createHitFrom(self, audioSnippet, hitType, numIncorrectWords=None):
        if hitType == "fix":
            suffix = "fixHIT"
            # half cent per incorrect word, up to eight words
            assert isinstance(numIncorrectWords, int)
            amount = max(min(.05, numIncorrectWords*.005), .02)
        elif hitType == "check":
            suffix = "checkHIT"
            amount = 0.05
        else:
            assert False

        if settings.IS_DEV_ENV:
            baseurl = 'https://localhost:5000/hit/' + suffix
        else:
            baseurl = "https://transcroobie.herokuapp.com/hit/" + suffix
        title = "Transcribe a short audio clip."
        description = "Transcribe the audio. Words may be cut off at the beginning"\
                      " or end of the segment. Do not worry about correctly"\
                      " transcribing these words."
        keywords = ["transcription"]
        frame_height = 800

        thisDocUrl = baseurl + "?docId=" + str(audioSnippet.pk)
        questionform = ExternalQuestion(thisDocUrl, frame_height)

        resultSet = self.connection.create_hit(
            title=title,
            description=description,
            keywords=keywords,
            max_assignments=1,
            question=questionform,
            reward=Price(amount=amount),
            response_groups=('Minimal', 'HITDetail'),  # I don't know what response groups are
        )
        assert len(resultSet) == 1
        audioSnippet.activeHITId = resultSet[0].HITId
        audioSnippet.save()

    def deleteHit(self, hitID):
        try:
            self.connection.disable_hit(hitID)
        except MTurkRequestError as e:
            print "HIT already deleted", e

    def deleteAllHits(self):
        allHits = [hit for hit in self.connection.get_all_hits()]
        for hit in allHits:
            print "Disabling hit ", hit.HITId
            self.deleteHit(hit.HITId)

    def processHit(self, questionFormAnswers):
        # Process each HIT only once. This function will set activeHITId to ""
        # to let you know that the HIT is completed and processed.
        hitType = None
        response = None
        audioSnippet = None
        fixWords = {}
        for questionFormAnswer in questionFormAnswers:
            if questionFormAnswer.qid == "asFileId":
                asFileId = questionFormAnswer.fields[0]
                audioSnippet = get_object_or_404(AudioSnippet, pk = asFileId)
            elif questionFormAnswer.qid == "fixedHITResult":
                hitType = "fix"
                response = None # need to look at word_%d based on audiosnippet
            elif questionFormAnswer.qid.startswith("word_"):
                fixWords[questionFormAnswer.qid] = questionFormAnswer.fields[0]
            elif questionFormAnswer.qid == "checkedHITResult":
                hitType = "check"
                responseStr = questionFormAnswer.fields[0]
                response = [val == 'true' for val in responseStr.split(',')]

        numIncorrectWords = 0
        if hitType == "fix":
            # Get the list of words marked incorrect, and count them
            incorrectWords = audioSnippet.incorrectWords['bools'][-1]
            numIncorrectWords = len(incorrectWords)-sum(incorrectWords)

            # Get the last prediction to interpret incorrectWords
            prediction = audioSnippet.predictions[-1].split()

            # Convert the last prediction to what was actually sent to
            # the user
            predictionSpaced = transcriptWithSpacesAndEllipses(prediction)
            assert len(incorrectWords) == len(predictionSpaced)
            words, isCorrect = combineConsecutiveDuplicates(predictionSpaced,
                    incorrectWords)

            response = ""
            for i in xrange(len(words)):
                if not isCorrect[i]:
                    response += fixWords["word_" + str(i)] + " "
                else:
                    # Only add punctuation (" ") and ellipses if marked incorrect
                    word = words[i]
                    if word.isspace() or word == "":
                        continue
                    elif i == 0 and word.startswith("..."):
                        word = word[3:] # remove initial ellipses
                    elif i == len(words)-1 and word.endswith("..."):
                        word = word[:-3] # remove trailing ellipses
                    response += word.strip() + " "
            audioSnippet.predictions.append(response)

            # Always do a check after a fix
            completionStatus = CompletionStatus.incomplete
        else:
            audioSnippet.incorrectWords['bools'].append(response)
            completionStatus = self.getCompletionStatus(audioSnippet, response)
            if completionStatus == CompletionStatus.correct:
                audioSnippet.hasBeenValidated = True
                audioSnippet.isComplete = True
            elif completionStatus == CompletionStatus.givenup:
                audioSnippet.hasBeenValidated = False
                audioSnippet.isComplete = True
        audioSnippet.activeHITId = ""

        if completionStatus == CompletionStatus.incomplete:
            if hitType == "check":
                # CHECK task complete. Create a FIX task (since not # hasBeenValidated)
                self.createHitFrom(audioSnippet, 'fix', numIncorrectWords)
            elif hitType == "fix":
                # FIX task complete. Create a CHECK task.
                self.createHitFrom(audioSnippet, 'check')

        audioSnippet.save()

    def getCompletionStatus(self, audioSnippet, response):
        # only callwhen all hitTypes == "check"
        # returns a CompletionStatus
        MAX_NUM_PREDICTIONS = 2

        completionStatus = CompletionStatus.incomplete
        if all(response):
            completionStatus = CompletionStatus.correct
        elif len(audioSnippet.predictions) > MAX_NUM_PREDICTIONS:
            completionStatus = CompletionStatus.givenup
        return completionStatus

    def processHits(self, doc):
        """ Returns whether or not the doc had a newly-completed HIT
            which was processed. """
        assert not doc.completeTranscript
        audioSnippets = doc.audioSnippets.order_by('id')

        newHITCompleted = False
        assignments = []
        for audioSnippet in audioSnippets:
            hitID = audioSnippet.activeHITId
            if not hitID: continue

            try:
                hit = self.connection.get_hit(hitID)
            except MTurkRequestError as e:
                logger.error("Perhaps this HIT no longer exists: " + str(e))
                continue

            asgnForHit = self.connection.get_assignments(hit[0].HITId)
            if asgnForHit:
                # Hit is ready. Get the data.
                for asgn in asgnForHit:
                    assignments.append(asgn)
                    questionFormAnswers = asgn.answers[0]
                    self.processHit(questionFormAnswers)
                    newHITCompleted = True

        statuses = [a.isComplete for a in audioSnippets]
        if all([a.hasBeenValidated for s in statuses]) or \
                all([a.isComplete for a in audioSnippets]):
            # Note: if the conditional is not met, predictions may be an empty
            # array. Don't run this next line outside of this conditional.
            # (Happens only in a race condition after the audioSnippet is
            # uploaded, and before it adds its first prediction.)
            responses = [a.predictions[-1] for a in audioSnippets]

            # All tasks complete for first time
            totalString = overlap.combineSeveral(responses)
            doc.completeTranscript = totalString
            doc.save()

        return newHITCompleted

    def isTaskReady(self, hitID):
        return len(self.connection.get_assignments(hitID)) > 0

    def approveAllHits(self):
        # Approve hits:
        for assignment in self.getAllAssignments():
            try:
                self.connection.approve_assignment(assignment.AssignmentId)
            except MTurkRequestError as e:
                # Maybe already approved?
                logger.error("MTurk Request Error: " + str(e))

    def checkIfHitsReady(self):
        return True

    def getAllAssignments(self):
        allHits = [hit for hit in self.connection.get_all_hits()]

        # Approve hits:
        for hit in allHits:
            assignments = self.connection.get_assignments(hit.HITId)
            for assignment in assignments:
                yield assignment
class ElicitationPipelineHandler(object):
    def __init__(self):
        aws_id = os.environ['AWS_ACCESS_KEY_ID']
        aws_k = os.environ['AWS_ACCESS_KEY']

        try:
            self.conn = MTurkConnection(aws_access_key_id=aws_id,\
                          aws_secret_access_key=aws_k,\
                          host=HOST)
        except Exception as e:
            print(e)

        self.ah = AssignmentHandler(self.conn)
        self.th = TurkerHandler(self.conn)
        self.hh = HitHandler(self.conn, TEMPLATE_DIR)
        self.mh = MongoElicitationHandler()
        self.ph = PromptHandler()
        self.filter = Filter(self.mh)
        self.balance = self.conn.get_account_balance()[0].amount
        self.batch_cost = 1
        if self.balance > self.batch_cost:
            self.balance = self.batch_cost
        else:
            raise IOError
        self.logger = logging.getLogger(
            "transcription_engine.elicitation_pipeline_handler")

    def load_PromptSource_RawToList(self, prompt_file_uri):
        """Create the prompt artifacts from the source."""
        prompt_dict = self.ph.get_prompts(prompt_file_uri)
        disk_space = os.stat(prompt_file_uri).st_size
        source_id = self.mh.create_prompt_source_artifact(
            prompt_file_uri, disk_space, len(prompt_dict))
        normalizer = Normalize()
        for key in prompt_dict:
            prompt, line_number = prompt_dict[key]
            normalized_prompt = normalizer.rm_prompt_normalization(prompt)
            self.mh.create_prompt_artifact(source_id, prompt,
                                           normalized_prompt, line_number, key,
                                           len(prompt))

    def load_assignment_hit_to_submitted(self):
        """Check all assignments for audio clip IDs.
            Update the audio clips.
            This is a non-destructive load of the assignments from MTurk"""
        hits = self.conn.get_all_hits()
        for hit in hits:
            transcription_dicts = [{}]
            hit_id = hit.HITId
            if self.mh.get_artifact("elicitation_hits", {"_id": hit_id}):
                assignments = self.conn.get_assignments(hit_id)
                have_all_assignments = True
                assignment_ids = []
                for assignment in assignments:
                    assignment_id = assignment.AssignmentId
                    assignment_ids.append(assignment_id)
                    if self.mh.get_artifact("elicitation_assignments",
                                            {"_id": assignment.AssignmentId}):
                        #We create assignments here, so if we already have it, skip
                        continue
                        #pass
                    else:
                        have_all_assignments = False
                    recording_ids = []
                    prompt_id_tag = "prompt_id"
                    recording_url_tag = "recording_url"
                    worker_id_tag = "worker_id"
                    recording_dict = self.ah.get_assignment_submitted_text_dict(
                        assignment, prompt_id_tag, recording_url_tag)
                    worker_oid = self.mh.create_worker_artifact(
                        assignment.WorkerId)
                    zipcode = None
                    for recording in recording_dict:
                        if recording[prompt_id_tag] == "zipcode":
                            zipcode = recording[recording_url_tag]
                            continue
                        if not self.mh.get_artifact_by_id(
                                "prompts", recording[prompt_id_tag]):
                            self.logger.info("Assignment(%s) with unknown %s(%s) skipped"%\
                                        (assignment_id,prompt_id_tag,recording[prompt_id_tag]))
                            break
                        recording_id = self.mh.create_recording_source_artifact(
                            recording[prompt_id_tag],
                            recording[recording_url_tag],
                            recording[worker_id_tag])
                        if not recording_id:
                            self.mh.create_assignment_artifact(assignment,
                                                               recording_ids,
                                                               zipcode=zipcode,
                                                               incomplete=True)
                            break

                        self.mh.add_item_to_artifact_set(
                            "prompts", recording[prompt_id_tag],
                            "recording_sources", recording_id)
                        recording_ids.append(recording_id)
                    else:
                        self.mh.create_assignment_artifact(assignment,
                                                           recording_ids,
                                                           zipcode=zipcode)
                        self.mh.add_item_to_artifact_set(
                            "elicitation_hits", hit_id,
                            "submitted_assignments", assignment_id)
                        self.mh.add_item_to_artifact_set(
                            "workers", worker_oid, "submitted_assignments",
                            assignment_id)
                print("Elicitation HIT(%s) submitted assignments: %s " %
                      (hit_id, assignment_ids))

    def approve_assignment_submitted_to_approved(self):
        """Approve all submitted assignments"""
        hits = self.conn.get_all_hits()
        for hit in hits:
            transcription_dicts = [{}]
            hit_id = hit.HITId
            if self.mh.get_artifact("elicitation_hits", {"_id": hit_id}):
                assignments = self.conn.get_assignments(hit_id)
                have_all_assignments = True
                assignment_ids = []
                for assignment in assignments:
                    assignment_id = assignment.AssignmentId
                    assignment_ids.append(assignment_id)
                    if self.mh.get_artifact("elicitation_assignments", {
                            "_id": assignment_id,
                            "state": "Submitted"
                    }):
                        #WARNING: this Approves every assignment
                        self.conn.approve_assignment(
                            assignment_id,
                            "Thank you for completing this assignment!")
                        self.mh.update_artifact_by_id(
                            "elicitation_assignments", assignment_id,
                            "approval_time", datetime.datetime.now())

    def approve_assignment_by_worker(self):
        """Approve all submitted assignments"""
        approval_comment = "Thank you for your recordings, good work, assignment approved!"
        denial_comment = "I'm sorry but your work was denied because %s"
        hits = self.conn.get_all_hits()
        for hit in hits:
            transcription_dicts = [{}]
            hit_id = hit.HITId
            if self.mh.get_artifact("elicitation_hits", {"_id": hit_id}):
                assignments = self.conn.get_assignments(hit_id)
                have_all_assignments = True
                assignment_ids = []
                for assignment in assignments:
                    assignment_id = assignment.AssignmentId
                    assignment_ids.append(assignment_id)
                    if self.mh.get_artifact("elicitation_assignments", {
                            "_id": assignment_id,
                            "state": "Submitted"
                    }):
                        #WARNING: this Approves every assignment
                        assignment_artifact = self.mh.get_artifact(
                            "elicitation_assignments", {"_id": assignment_id})
                        recording_ids = assignment_artifact["recordings"]
                        worker = self.mh.get_artifact(
                            "workers",
                            {"eid": assignment_artifact["worker_id"]})
                        if worker["state"] == "Approved":
                            #If the worker is approved, approve the assignment automatically
                            self.conn.approve_assignment(
                                assignment_id, approval_comment)
                            self.mh.update_artifact_by_id(
                                "elicitation_assignments", assignment_id,
                                "approval_time", datetime.datetime.now())
                            continue
                        elif worker["state"] == "Rejected":
                            self.conn.reject_assignment(
                                assignment_id, worker["rejection_reason"])
                            self.mh.update_artifact_by_id(
                                "elicitation_assignments", assignment_id,
                                "approval_time", datetime.datetime.now())
                            continue
                        recording_uris = []
                        for recording_id in recording_ids:
                            uri = self.mh.get_artifact_by_id(
                                "recording_sources", recording_id,
                                "recording_uri")
                            recording_uris.append(uri)
                        command = ["gnome-mplayer"] + recording_uris
                        if len(recording_uris) > 0 and recording_uris[
                                0].endswith(" .wav") or recording_uris[
                                    0].endswith(".com.wav"):
                            continue
                        print("Calling: %s" % command)
                        call(command)
                        approve_assignment = raw_input(
                            "Approve assignment(y/n/s)?")
                        if approve_assignment == "s":
                            #skip
                            continue
                        elif approve_assignment == "y":
                            #accept the assignment
                            self.conn.approve_assignment(
                                assignment_id, approval_comment)
                            self.mh.update_artifact_by_id(
                                "elicitation_assignments", assignment_id,
                                "approval_time", datetime.datetime.now())
                            approve_worker = raw_input("Approve worker(y/n)?")
                            if approve_worker == "y":
                                #approve the worker and all future assignments
                                self.mh.update_artifact_by_id(
                                    "workers", worker["_id"], "approval_time",
                                    datetime.datetime.now())
                        elif approve_assignment == "n":
                            #Reject the assignment
                            reject_worker = raw_input(
                                "Reject this worker's future work?")
                            if reject_worker == "y":
                                #Reject the worker
                                reason = raw_input(
                                    "Reason for rejecting this worker's future work:"
                                )
                                self.mh.update_artifact_by_id(
                                    "workers", worker["_id"],
                                    "rejection_reason", reason)
                                self.conn.reject_assignment(
                                    assignment_id,
                                    denial_comment % reason + ".")
                            else:
                                reason = raw_input(
                                    "Why reject the assignment?")
                                self.conn.reject_assignment(
                                    assignment_id,
                                    denial_comment % reason + ".")

    def get_assignment_stats(self):
        effective_hourly_wage = self.effective_hourly_wage_for_approved_assignments(
            .20)

    def effective_hourly_wage_for_approved_assignments(self,
                                                       reward_per_assignment):
        """Calculate the effective hourly wage for Approved Assignments"""
        approved_assignments = self.mh.get_artifacts_by_state(
            "elicitation_assignments", "Approved")
        total = datetime.timedelta(0)
        count = 0
        for assignment in approved_assignments:
            accepted = datetime.datetime.strptime(assignment["AcceptTime"],
                                                  "%Y-%m-%dT%H:%M:%SZ")
            submitted = datetime.datetime.strptime(assignment["SubmitTime"],
                                                   "%Y-%m-%dT%H:%M:%SZ")
            total += submitted - accepted
            count += 1
            #self.mh.update_artifact_by_id("elicitation_assignments", assignment["_id"], "SubmitTime", completion_time)
        seconds_per_assignment = total.total_seconds() / count
        effective_hourly_wage = 60.0 * 60.0 / seconds_per_assignment * reward_per_assignment
        print("Effective completion time(%s) *reward(%s) = %s" %
              (seconds_per_assignment, reward_per_assignment,
               effective_hourly_wage))

    def enqueue_prompts_and_generate_hits(self):
        prompts = self.mh.get_artifacts_by_state("prompts", "New")
        for prompt in prompts:
            self.mh.enqueue_prompt(prompt["_id"], 1, 5)
            prompt_queue = self.mh.get_prompt_queue()
            prompt_pairs = self.mh.get_prompt_pairs(prompt_queue)
            if prompt_pairs:
                hit_title = "Audio Elicitation"
                question_title = "Speak and Record your Voice"
                hit_description = "Speak the prompt and record your voice."
                keywords = "audio, elicitation, speech, recording"
                if cost_sensitive:
                    reward_per_clip = 0.04
                    max_assignments = 2
                    estimated_cost = self.hh.estimate_html_HIT_cost(prompt_pairs,reward_per_clip=reward_per_clip,\
                                                                    max_assignments=max_assignments)
                    prompts_in_hits = self.mh.prompts_already_in_hit(
                        prompt_pairs)
                    if prompts_in_hits:
                        #If one or more clips are already in a HIT, remove it from the queue
                        self.mh.remove_artifact_from_queue(prompts_in_hits)
                    elif self.balance - estimated_cost >= 0:
                        #if we have enough money, create the HIT
                        response = self.hh.make_html_elicitation_HIT(
                            prompt_pairs,
                            hit_title,
                            question_title,
                            keywords,
                            hit_description,
                            max_assignments=max_assignments,
                            reward_per_clip=reward_per_clip)
                        #                         response = self.hh.make_question_form_elicitation_HIT(prompt_pairs,hit_title,
                        #                                                      question_title, keywords)
                        self.balance = self.balance - estimated_cost
                        if type(response) == ResultSet and len(
                                response) == 1 and response[0].IsValid:
                            response = response[0]
                            self.mh.remove_artifacts_from_queue(
                                "prompt_queue", prompt_queue)
                            prompt_ids = [w["prompt_id"] for w in prompt_queue]
                            hit_id = response.HITId
                            hit_type_id = response.HITTypeId
                            self.mh.create_elicitation_hit_artifact(
                                hit_id, hit_type_id, prompt_ids)
                            self.mh.update_artifacts_by_id(
                                "prompts", prompt_ids, "hit_id", hit_id)
                            self.logger.info("Successfully created HIT: %s" %
                                             hit_id)
                    else:
                        return True
        print("Amount left in batch: %s out of %s" %
              (self.balance, self.batch_cost))

    def allhits_liveness(self):
        #allassignments = self.conn.get_assignments(hit_id)
        #first = self.ah.get_submitted_transcriptions(hit_id,str(clipid))

        hits = self.conn.get_all_hits()
        selection = raw_input("Remove all hits with no assignments?")
        if selection == "y":
            for hit in hits:
                hit_id = hit.HITId
                assignments = self.conn.get_assignments(hit_id)
                if len(assignments) == 0:
                    try:
                        self.conn.disable_hit(hit_id)
                        prompts = self.mh.get_artifact("elicitation_hits",
                                                       {"_id": hit_id},
                                                       "prompts")
                        self.mh.remove_elicitation_hit(hit_id)
                        if prompts:
                            self.mh.update_artifacts_state("prompts", prompts)
                        else:
                            pass
                    except MTurkRequestError as e:
                        raise e
            return True
        for hit in hits:
            hit_id = hit.HITId
            print("HIT ID: %s" % hit_id)
            assignments = self.conn.get_assignments(hit_id)
            if len(assignments) == 0:
                if raw_input("Remove hit with no submitted assignments?(y/n)"
                             ) == "y":
                    try:
                        self.conn.disable_hit(hit_id)
                        prompts = self.mh.get_artifact("elicitation_hits",
                                                       {"_id": hit_id},
                                                       "prompts")
                        self.mh.remove_elicitation_hit(hit_id)
                        if prompts:
                            self.mh.update_artifacts_state("prompts", prompts)
                        else:
                            pass
                    except MTurkRequestError as e:
                        raise e
            else:
                if raw_input("Remove hit with %s submitted assignments?(y/n)" %
                             len(assignments)) == "y":
                    try:
                        self.conn.disable_hit(hit_id)
                    except MTurkRequestError as e:
                        raise e

    def run(self):
        #audio_file_dir = "/home/taylor/data/corpora/LDC/LDC93S3A/rm_comp/rm1_audio1/rm1/dep_trn"
        prompt_file_uri = "/home/taylor/data/corpora/LDC/LDC93S3A/rm_comp/rm1_audio1/rm1/doc/al_sents.snr"
        selection = 0
        #self.get_time_submitted_for_assignments()
        while selection != "8":
            selection = raw_input(
                """Prompt Source raw to Elicitations-Approved Pipeline:\n
                                     1: PromptSource-Load_RawToList: Load Resource Management 1 prompt source files to queueable prompts
                                     2: Prompt-ReferencedToHit: Queue all referenced prompts and create a HIT if the queue is full.
                                     3: Prompt-HitToAssignmentSubmitted: Check all submitted assignments for Elicitations and download elicitations.
                                     4: Maintain all assignments and hits.
                                     5: (WARNING, approves all assignments) Approve all submitted assignments.
                                     6: Calculate assignment stats.
                                     7: Hand approve submitted assignments by elicitation and/or by worker. 
                                     8: Exit
                                    """)
            if selection == "1":
                self.load_PromptSource_RawToList(prompt_file_uri)
            elif selection == "2":
                self.enqueue_prompts_and_generate_hits()
            elif selection == "3":
                self.load_assignment_hit_to_submitted()
            elif selection == "4":
                self.allhits_liveness()
            elif selection == "5":
                self.approve_assignment_submitted_to_approved()
            elif selection == "6":
                self.get_assignment_stats()
            elif selection == "7":
                self.approve_assignment_by_worker()
            else:
                selection = "8"


#    prompt_dict = self.ph.get_prompts(prompt_file_uri)

#     def get_time_submitted_for_assignments(self):
#         assignments = self.mh.get_all_artifacts("elicitation_assignments")
#         for assignment in assignments:
#             assignment_id = assignment["_id"]
#             a_assignment = self.conn.get_assignment(assignment_id)[0]
#             self.mh.update_artifact_by_id("elicitation_assignments", assignment_id, "SubmitTime", a_assignment.SubmitTime)
class TranscriptionPipelineHandler():
    def __init__(self):
        aws_id = os.environ['AWS_ACCESS_KEY_ID']
        aws_k = os.environ['AWS_ACCESS_KEY']

        self.conn = MTurkConnection(aws_access_key_id=aws_id,\
                          aws_secret_access_key=aws_k,\
                          host=HOST)
        
        self.ah = AssignmentHandler(self.conn)
        self.th = TurkerHandler(self.conn)
        self.hh = HitHandler(self.conn,TEMPLATE_DIR)
        self.mh = MongoTranscriptionHandler()
        self.wh = WavHandler()
        self.ph = PromptHandler()
        self.filter = Filter(self.mh)
        self.balance = self.conn.get_account_balance()[0].amount
        self.logger = logging.getLogger("transcription_engine.transcription_pipeline_handler")
        
    def audio_clip_referenced_to_hit(self,priority=1,max_queue_size=10):    
        for audio_clip in self.mh.get_artifacts_by_state("audio_clips","Referenced"):
            audio_clip_id = audio_clip["_id"]
            self.mh.queue_clip(audio_clip_id, priority, max_queue_size)
            response = self.audio_clip_queue_to_hit()

    def audio_clip_queued_to_hit(self,priority=1,max_queue_size=10):    
        for audio_clip in self.mh.get_artifacts("audio_clips",{"state":"Queued"}):
            audio_clip_id = audio_clip["_id"]
            response = self.audio_clip_queue_to_hit()
            #===================================================================
            # elif state == "Hit":
            #     print("In hit: %s"%audio_clip_url)
            #===================================================================

    
    def audio_clip_queue_to_hit(self,cost_sensitive=True):
        """Take queued audio clips from the audio clip queue
            put them in a hit and create the hit.
            If successful, update the audio clip state."""
        clip_queue = self.mh.get_audio_clip_queue()
        clip_pairs = self.mh.get_audio_clip_pairs(clip_queue)
        if clip_pairs:
            hit_title = "Audio Transcription"
            question_title = "List and Transcribe" 
            description = "Transcribe the audio clip by typing the words the person says in order."
            keywords = "audio, transcription, audio transcription"
            if cost_sensitive:
                reward_per_clip = 0.02
                max_assignments = 3
                estimated_cost = self.hh.estimate_html_HIT_cost(clip_pairs,reward_per_clip,max_assignments)
                clips_in_hits = self.mh.clips_already_in_hit(clip_pairs)
                if clips_in_hits:
                    #If one or more clips are already in a HIT, remove it from the queue
                    self.mh.remove_audio_clips_from_queue(clips_in_hits)
                elif self.balance - estimated_cost >= 250:
                    #if we have enough money, create the HIT
                    response = self.hh.make_html_transcription_HIT(clip_pairs,hit_title,
                                                 question_title, description, keywords)
                    self.balance = self.balance - estimated_cost
                    if type(response) == ResultSet and len(response) == 1 and response[0].IsValid:
                        response = response[0]
                        self.mh.remove_audio_clips_from_queue(clip_queue)
                        audio_clip_ids = [w["audio_clip_id"] for w in clip_queue]    
                        hit_id = response.HITId
                        hit_type_id = response.HITTypeId
                        self.mh.create_transcription_hit_artifact(hit_id,hit_type_id,clip_queue,"New")        
                        self.logger.info("Successfully created HIT: %s"%hit_id)
                        return self.mh.update_audio_clips_state(audio_clip_ids,"Hit")
                else:
                    pass
        return False
            
    def load_assignments_hit_to_submitted(self):
        """Check all assignments for audio clip IDs.
            Update the audio clips.
            This is a non-destructive load of the assignments from MTurk"""
        hits = self.conn.get_all_hits()
        for hit in hits:
            transcription_dicts = [{}]
            hit_id = hit.HITId
            assignments = self.conn.get_assignments(hit_id)
            have_all_assignments = True
            assignment_ids = []
            for assignment in assignments:
                assignment_ids.append(assignment.AssignmentId)  
                if self.mh.get_artifact("assignments",{"_id":assignment.AssignmentId}):
                    #We create assignments here, so if we already have it, skip
                    continue   
                else:
                    have_all_assignments = False                                         
                transcription_ids = []                
                transcription_dicts = self.ah.get_assignment_submitted_transcriptions(assignment)   
                if transcription_dicts and len(transcription_dicts)==10:
                    pass             
                for transcription in transcription_dicts:
                    if not self.mh.get_artifact_by_id("audio_clips",transcription["audio_clip_id"]): 
                        self.logger.info("Assignment(%s) with unknown audio clip(%s) skipped"%\
                                    (assignment.AssignmentId,transcription["audio_clip_id"]))
                        break 
                    self.mh.update_transcription_state(transcription,"Submitted")
                    self.mh.update_audio_clips_state([transcription["audio_clip_id"]], "Submitted")
                    transcription_ids.append(self.mh.get_artifact("transcriptions",{"audio_clip_id" : transcription["audio_clip_id"],
                                                                        "assignment_id" : transcription["assignment_id"]},
                                                                       "_id"))
                else:
                    self.mh.create_assignment_artifact(assignment,
                                                   transcription_ids,
                                                   "Submitted")
            if assignments and not have_all_assignments:
                self.mh.update_transcription_hit_state(hit_id,"Submitted")
            print("Transcriptions HIT(%s) submitted assignments: %s "%(hit_id,assignment_ids))
            
    def assignment_submitted_approved(self):
        """For all submitted assignments,
            if an answered question has a reference transcription,
            check the WER.
            If all the answered questions with reference transcriptions
            have an acceptable WER, approve the assignment and update
            the audio clips and transcriptions."""
        assignments = self.mh.get_artifacts_by_state("assignments", "Submitted")
        rejected_feedback = "I'm sorry but your work in assignment(%s) was rejected because" +\
                            " one or more of your transcriptions " +\
                            " had a word error rate above the maximum acceptable"+\
                            " word error rate of %s. Omitted words and words that "+\
                            " differed by more than %s "+\
                            " characters were counted as an error."
        accepted_feedback = "Your average word error rate on assignment(%s) was %s."+\
                            " Assignment accepted! Thanks for your hard work."
        for assignment in assignments:
            assignment_id = assignment["_id"]
            transcription_ids = assignment["transcriptions"]
            transcriptions = self.mh.get_artifacts("transcriptions","_id",transcription_ids)

            worker_id = assignment["worker_id"]
            worker_id = self.mh.create_worker_artifact(worker_id)
            
            approved, average_wer  = self.filter.approve_assignment(transcriptions)
            if approved:
                try:
                    self.conn.approve_assignment(assignment_id, accepted_feedback%(assignment_id,average_wer))
                except MTurkRequestError as e:
                    print(e)
                else:
                    self.mh.update_assignment_state(assignment,"Approved")    
                    for transcription in transcriptions:
                        #Approve transcriptions without references in the same assignment
                        reference_id = self.mh.get_artifact_by_id("audio_clips",transcription["audio_clip_id"],"reference_transcription_id")
                        if not reference_id:
                            self.mh.update_transcription_state(transcription,"Approved")                                          
                    print("Approved transcription ids: %s"%transcription_ids)
            else:
                #Don't deny for now
                feedback = rejected_feedback%(assignment_id,self.filter.WER_THRESHOLD,self.filter.CER_THRESHOLD)
                self.logger.info(feedback)
                self.conn.reject_assignment(assignment_id,feedback)
                self.mh.update_assignment_state(assignment,"Denied")    
                #print("Assignments not aproved %s "%denied)
            #Update the worker
            if approved:
                self.mh.add_assignment_to_worker(worker_id,(assignment_id,average_wer))
            
    def _load_rm_audio_source_file_to_clipped(self,file_dir,prompt_file_uri,
                                                   base_clip_dir,sample_rate=16000,
                                                   http_base_url = "http://www.cis.upenn.edu/~tturpen/wavs/",
                                                   init_clip_count = 200):
        """For an audio directory,
            see which files are new and not an audio source already
            """
        prompt_dict = self.ph.get_prompts(prompt_file_uri)
        count = 0
        for root, dirs, files in os.walk(file_dir):
            for f in files:
                if count == init_clip_count:
                    return
                system_uri = os.path.join(root,f)
                out_uri = system_uri.strip(".sph") + ".wav"
                out_uri = os.path.basename(out_uri)
                out_uri = os.path.join(root,(out_uri))
                spkr_id = str(os.path.relpath(root,file_dir))
                #sph to wav
                if not f.endswith(".wav") and not os.path.exists(out_uri):
                    try:
                        self.wh.sph_to_wav(system_uri,out_uri=out_uri)
                    except WavHandlerException as e:
                        self.logger.error("Unable to create wav from sph: "+str(e))
                        
                if os.path.exists(out_uri) and out_uri.endswith(".wav"):
                    #create audio source artifact
                    count += 1
                    wav_filename = os.path.basename(out_uri)
                    prompt_id = os.path.basename(out_uri).strip(".wav").upper()
                    encoding = ".wav"
                    sample_rate = 16000
                    disk_space = os.stat(out_uri).st_size
                    length_seconds = self.wh.get_audio_length(out_uri)
                    if prompt_id in prompt_dict:                        
                        transcription_prompt = prompt_dict[prompt_id]
                    else:
                        #No prompt found
                        raise PromptNotFound
                    source_id = self.mh.create_audio_source_artifact(out_uri,
                                                         disk_space,
                                                         length_seconds,
                                                         sample_rate,
                                                         spkr_id,
                                                         encoding)
                    #create audio clip artifact
                    audio_clip_uri = os.path.join(base_clip_dir,spkr_id,wav_filename)                    
                    clip_dir = os.path.dirname(audio_clip_uri)
                    if not os.path.exists(clip_dir):
                        os.makedirs(clip_dir)
                    if not os.path.exists(audio_clip_uri):
                        copyfile(out_uri,audio_clip_uri)     
                    #http_url
                    http_url = os.path.join(http_base_url,spkr_id,wav_filename)                   
                    clip_id = self.mh.create_audio_clip_artifact(source_id,
                                                       0,
                                                       -1,
                                                       audio_clip_uri,
                                                       http_url,
                                                       length_seconds,
                                                       disk_space)
                    
                    #Update the audio source, updates state too
                    self.mh.update_audio_source_audio_clip(source_id,clip_id)

                    #Create the reference transcription artifact
                    transcription_id = self.mh.create_reference_transcription_artifact(clip_id,
                                                                                       transcription_prompt,
                                                                                       "Gold")
                    #Completes audio clip to Referenced
                    self.mh.update_audio_clip_reference_transcription(clip_id,transcription_id)                    
        
    def all_workers_liveness(self):
        workers = self.mh.get_all_workers()
        for worker in workers:
            worker_id = worker["_id"]
            approved, denied = self.mh.get_worker_assignments(worker)
            print("Worker(%s) assignments, approved(%s) denied(%s)"%(worker["_id"],approved,denied))
            selection = input("1. Show denied transcriptions and references.\n"+
                                    "2. Show accepted transcriptions and references.\n"+
                                    "3. Show both denied and accepted transcriptions.")
            if selection == 1 or selection == 3:
                print("Approved transcriptions")
                for assignment_id in approved:
                    transcription_pairs = self.mh.get_transcription_pairs(assignment_id)
                    for pair in transcription_pairs:
                        print ("Reference:\n\t%s\nHypothesis:\n\t%s\n"%(pair[0],pair[1]))
            if selection == 2 or selection == 3:
                print("Denied transcriptions")
                for assignment_id in denied:
                    transcription_pairs = self.mh.get_transcription_pairs(assignment_id)
                    for pair in transcription_pairs:
                        print ("Reference:\n\t%s\nHypothesis:\n\t%s\n"%(pair[0],pair[1]))
            
    def stats(self):
        workers = self.mh.get_all_workers()
        all_wer_per_approved_assignment = 0.0
        total_accepted = 0.0
        for worker in workers:
            worker_wer = 0.0
            worker_id = worker["_id"]
            approved, denied = self.mh.get_worker_assignments_wer(worker)
            for w in approved: 
                all_wer_per_approved_assignment += float(w[1])
                worker_wer += float(w[1])
                total_accepted += 1
            if approved:
                worker_average_wer = worker_wer/len(approved)
                print("%s,%s"%(len(approved),worker_average_wer))
            #print("Worker(%s) approved assignments(%s)\n denied assignments(%s)"%(worker_id,approved,denied))
        av = all_wer_per_approved_assignment/total_accepted
        print("Average WER per assignment(%s)"%(av))
        
    def get_assignment_stats(self):
        self.effective_hourly_wage_for_approved_assignments(.20)                    
    
    def effective_hourly_wage_for_approved_assignments(self,reward_per_assignment):
        """Calculate the effective hourly wage for Approved Assignments"""        
        approved_assignments = self.mh.get_artifacts_by_state("assignments","Approved")
        total = datetime.timedelta(0)
        count = 0
        for assignment in approved_assignments:
            if "SubmitTime" in assignment:
                accepted = datetime.datetime.strptime(assignment["AcceptTime"],"%Y-%m-%dT%H:%M:%SZ")
                submitted = datetime.datetime.strptime(assignment["SubmitTime"],"%Y-%m-%dT%H:%M:%SZ")
            else:
                pass
            total += submitted-accepted
            count += 1
        seconds_per_assignment = total.total_seconds()/count
        effective_hourly_wage = 60.0*60.0/seconds_per_assignment * reward_per_assignment
        print("Effective completion time(%s) *reward(%s) = %s"%(seconds_per_assignment,reward_per_assignment,effective_hourly_wage))        
        
    def allhits_liveness(self):
        #allassignments = self.conn.get_assignments(hit_id)
        #first = self.ah.get_submitted_transcriptions(hit_id,str(clipid))

        hits = self.conn.get_all_hits()
        for hit in hits:
            hit_id = hit.HITId            
            print("HIT ID: %s"%hit_id)
            assignments = self.conn.get_assignments(hit_id)
            if len(assignments) == 0:
                if raw_input("Remove hit with no submitted assignments?(y/n)") == "y":
                    try:
                        self.conn.disable_hit(hit_id)
                        clips = self.mh.get_artifact("transcription_hits",{"_id": hit_id},"clips")
                        self.mh.remove_transcription_hit(hit_id)
                        self.mh.update_audio_clips_state(clips, "Referenced")
                    except MTurkRequestError as e:
                        raise e
            else:
                if raw_input("Remove hit with %s submitted assignments?(y/n)"%len(assignments)) == "y":
                    try:
                        self.conn.disable_hit(hit_id)
                    except MTurkRequestError as e:
                        raise e
                    
    def run(self):
        audio_file_dir = "/home/taylor/data/corpora/LDC/LDC93S3A/rm_comp/rm1_audio1/rm1/ind_trn"
        #audio_file_dir = "/home/taylor/data/corpora/LDC/LDC93S3A/rm_comp/rm1_audio1/rm1/dep_trn"
        prompt_file_uri = "/home/taylor/data/corpora/LDC/LDC93S3A/rm_comp/rm1_audio1/rm1/doc/al_sents.snr"
        base_clip_dir = "/home/taylor/data/corpora/LDC/LDC93S3A/rm_comp/rm1_audio1/rm1/clips"
        selection = 0
        init_clip_count = 10000
        while selection != "11":
            selection = raw_input("""Audio Source file to Audio Clip Approved Pipeline:\n
                                     1: AudioSource-FileToClipped: Initialize Resource Management audio source files to %d queueable(Referenced) clips
                                     2: AudioClip-ReferencedToHit: Queue all referenced audio clips and create a HIT if the queue is full.
                                     3: AudioClip-HitToSubmitted: Check all submitted assignments for Transcriptions.
                                     4: AudioClip-SubmittedToApproved: Check all submitted clips against their reference.
                                     5: Review Current Hits
                                     6: Worker liveness
                                     7: Account balance
                                     8: Worker stats
                                     9: Recalculate worker WER                                     
                                     10: Assignment Stats
                                     11: Exit
                                    """%init_clip_count)
            #selection = "5"
            if selection == "1":
                self._load_rm_audio_source_file_to_clipped(audio_file_dir,
                                                       prompt_file_uri,
                                                       base_clip_dir,init_clip_count=init_clip_count)
            elif selection == "2":
                self.audio_clip_referenced_to_hit()
            elif selection == "3":
                self.load_assignments_hit_to_submitted()
            elif selection == "4":
                self.assignment_submitted_approved()
            elif selection == "5":
                self.allhits_liveness()
            elif selection == "6":
                self.all_workers_liveness()
            elif selection == "7":
                print("Account balance: %s"%self.balance)
            elif selection == "8":
                self.stats()
            elif selection == "9":
                self.recalculate_worker_assignment_wer()
            elif selection == "10":
                self.get_assignment_stats()

#     def get_time_submitted_for_assignments(self):
#         assignments = self.mh.get_all_artifacts("assignments")
#         for assignment in assignments:
#             assignment_id = assignment["_id"]
#             a_assignment = self.conn.get_assignment(assignment_id)[0]
#             self.mh.update_artifact_by_id("assignments", assignment_id, "SubmitTime", a_assignment.SubmitTime)
                    
#     def recalculate_worker_assignment_wer(self):
#         """For all submitted assignments,
#             if an answered question has a reference transcription,
#             check the WER.
#             If all the answered questions with reference transcriptions
#             have an acceptable WER, approve the assignment and update
#             the audio clips and transcriptions."""
#         assignments = self.mh.get_artifacts("assignments",{"state":"Approved"})        
#         for assignment in assignments:
#             assignment_id = assignment["_id"]
#             denied = []
#             #If no transcriptions have references then we automatically approve the HIT
#             approved = True
#             transcription_ids = assignment["transcriptions"]
#             transcriptions = self.mh.get_transcriptions("_id",transcription_ids)
#             worker_id = assignment["worker_id"]
#             worker_id = self.mh.create_worker_artifact(worker_id)
#             
#             max_rej_wer = (0.0,0.0)
#             total_wer = 0.0
#             for transcription in transcriptions:
#                 #Normalize the transcription
#                 #self.mh.normalize_transcription
#                 reference_id = self.mh.get_audio_clip_by_id(transcription["audio_clip_id"],"reference_transcription_id")
#                 if reference_id:
#                     reference_transcription = self.mh.get_reference_transcription({"_id": reference_id},
#                                                                                   "transcription")
#                     new_transcription = transcription["transcription"].split(" ")
#                     if reference_transcription:
#                         transcription_wer = cer_wer(reference_transcription,new_transcription)
#                         total_wer += transcription_wer
#                         if transcription_wer < WER_THRESHOLD:
#                             self.logger.info("WER for transcription(%s) %d"%(transcription["transcription"],transcription_wer))
#                         else:
#                             max_rej_wer = (transcription_wer,WER_THRESHOLD)
#                             denied.append((reference_transcription,new_transcription))
#                             approved = False
#             average_wer = total_wer/len(transcriptions)
#             #Update the worker
#             self.mh.add_assignment_to_worker(worker_id,(assignment_id,average_wer))
Пример #15
0
class MturkHelper(object):
    """
		This class handles task creation for amazon mechanical task service.

		Amazon MTruk is used to crowdsource matching products.

		Initialisation :
			- reference : reference of the product
			- osm_from : the origin osm of a product
			- osm_to : the osm to look into
	"""
    if settings.SANDBOX:
        AWS_SECRET_ACCESS_KEY = settings.AWS_SECRET_ACCESS_KEY
        AWS_ACCESS_KEY_ID = settings.AWS_ACCESS_KEY_ID
    else:
        AWS_SECRET_ACCESS_KEY = 'e6/8e5lcCcESPKT/fe6kYkJtf0+7F2w7459WTJ0v'
        AWS_ACCESS_KEY_ID = 'AKIAIP5JQO7FQX6Q7JAQ'

    def __init__(self,
                 reference=None,
                 osm_from=None,
                 osm_to=None,
                 key=None,
                 hitid=None):
        self.reference = reference
        self.osm_from = osm_from
        self.osm_to = osm_to
        self.key = key
        self.hitid = hitid
        if key is None:
            self.task = None
        else:
            self.task = self.get_task()

        self.mtc = MTurkConnection(
            aws_access_key_id=MturkHelper.AWS_ACCESS_KEY_ID,
            aws_secret_access_key=MturkHelper.AWS_SECRET_ACCESS_KEY,
            host=settings.HOST)

    def get_all_reviewable_hits(self):
        page_size = 50
        hits = self.mtc.get_reviewable_hits(page_size=page_size)
        print "Total results to fetch %s " % hits.TotalNumResults
        print "Request hits page %i" % 1
        total_pages = float(hits.TotalNumResults) / page_size
        int_total = int(total_pages)
        if (total_pages - int_total > 0):
            total_pages = int_total + 1
        else:
            total_pages = int_total
        pn = 1
        while pn < total_pages:
            pn = pn + 1
            print "Request hits page %i" % pn
            temp_hits = self.mtc.get_reviewable_hits(page_size=page_size,
                                                     page_number=pn)
            hits.extend(temp_hits)

        return hits

    def get_hits(self, validate=False, all_hits=False):
        if not all_hits:
            hits = self.get_all_reviewable_hits()
        else:
            hits = self.mtc.get_all_hits()
        for hit in hits:
            print "####################"
            print "--------------------"
            print "HitId = %s" % (hit.HITId)
            assignments = self.mtc.get_assignments(hit.HITId)
            # Getting task associated to hit
            task = Task.objects.filter(hitId=hit.HITId)
            print 'Number of corresponding tasks = %d' % len(task)
            if len(task) > 0:
                task = task[0]
            else:
                task = None

            for assignment in assignments:
                print "AssignmentId = %s" % (assignment.AssignmentId)
                print "Answers of the worker %s" % assignment.WorkerId
                for question_form_answer in assignment.answers[0]:
                    qid = question_form_answer.qid
                    if qid == 'flagged':
                        for value in question_form_answer.fields:
                            # Saving resultTask
                            if task is not None:
                                print 'Saving result task, result = %s' % (
                                    value)
                                resulttask, created = ResultTask.objects.get_or_create(
                                    task=task,
                                    assignementId=assignment.AssignmentId,
                                    workerId=assignment.WorkerId)
                                resulttask.reference = value
                                resulttask.save()
                            elif validate:
                                try:
                                    self.mtc.approve_assignment(
                                        assignment.AssignmentId)
                                except Exception, e:
                                    print e
            try:
                if validate:
                    self.mtc.disable_hit(hit.HITId)
            except Exception, e:
                print e

                print "--------------------"
Пример #16
0
#!/home/dave/anaconda2/bin/python

import sys

sys.path.append(
    '/home/dave/OneDrive/Research/By Project/Dissertation/experiments/private/'
)

from boto.mturk.connection import MTurkConnection

from awsKeys import aws_access_key_id
from awsKeys import aws_secret_access_key

HOST = 'mechanicalturk.sandbox.amazonaws.com'  # Use this to post to the sandbox instead

mtc = MTurkConnection(aws_access_key_id=aws_access_key_id,
                      aws_secret_access_key=aws_secret_access_key,
                      host=HOST)

hit_ids = []

for h in mtc.get_all_hits():
    hit_ids.append(h.HITId)

for h in hit_ids:
    mtc.disable_hit(h)
    print mtc.get_hit(h)[0].HITId + ': ' + mtc.get_hit(h)[0].HITStatus
Пример #17
0
    if(total_pages-int_total>0):
        total_pages = int_total+1
    else:
        total_pages = int_total
    pn = 1
    while pn < total_pages:
        pn = pn + 1
        print "Request hits page %i" % pn
        temp_hits = mtc.get_reviewable_hits(page_size=page_size,page_number=pn)
        hits.extend(temp_hits)
    return hits
 
mtc = MTurkConnection(aws_access_key_id='llllllllllllllllllllllllllllllllllllll',
                      aws_secret_access_key='oooooooooooooooooooooooooooooooooooo',
                      host='mechanicalturk.sandbox.amazonaws.com')
 
 
hits = get_all_reviewable_hits(mtc)

for hit in hits:
    assignments = mtc.get_assignments(hit.HITId)
    mtc.disable_hit(hit.HITId, response_groups=None)
    for assignment in assignments:
        print "Answers of the worker %s" % assignment.WorkerId
        for question_form_answer in assignment.answers:
            for  element in question_form_answer:
                for value in  element.fields:
                    print "%s" % (value)
        print "------------------------------------------------"

Пример #18
0
def cleanup():
    """Remove any boto test related HIT's"""

    conn = MTurkConnection(host='mechanicalturk.sandbox.amazonaws.com')
    current_page = 1
    page_size = 10
    total_disabled = 0
    ignored = []

    while True:
        # reset the total for this loop
        disabled_count = 0

        # search all the hits in the sandbox
        search_rs = conn.search_hits(page_size=page_size, page_number=current_page)

        # success?
        if search_rs.status:
            for hit in search_rs:
                # delete any with Boto in the description
                print 'hit id:%s Status:%s, desc:%s' %(hit.HITId, hit.HITStatus, hit.Description)
                if hit.Description.find('Boto') != -1:
                    if hit.HITStatus != 'Reviewable':
                        print 'Disabling hit id:%s %s' %(hit.HITId, hit.Description)
                        disable_rs = conn.disable_hit(hit.HITId)
                        if disable_rs.status:
                            disabled_count += 1
                            # update the running total
                            total_disabled += 1
                        else:
                            print 'Error when disabling, code:%s, message:%s' %(disable_rs.Code, disable_rs.Message)
                    else:
                        print 'Disposing hit id:%s %s' %(hit.HITId, hit.Description)
                        dispose_rs = conn.dispose_hit(hit.HITId)
                        if dispose_rs.status:
                            disabled_count += 1
                            # update the running total
                            total_disabled += 1
                        else:
                            print 'Error when disposing, code:%s, message:%s' %(dispose_rs.Code, dispose_rs.Message)

                else:
                    if hit.HITId not in ignored:
                        print 'ignored:%s' %hit.HITId
                        ignored.append(hit.HITId)

            # any more results?
            if int(search_rs.TotalNumResults) > current_page*page_size:
                # if we have disabled any HITs on this page
                # then we don't need to go to a new page
                # otherwise we do
                if not disabled_count:
                    current_page += 1
            else:
                # no, we're done
                break
        else:
            print 'Error performing search, code:%s, message:%s' %(search_rs.Code, search_rs.Message)
            break

    total_ignored = len(ignored)
    print 'Processed: %d HITs, disabled/disposed: %d, ignored: %d' %(total_ignored + total_disabled, total_disabled, total_ignored)
Пример #19
0
class MturkHelper(object):
	"""
		This class handles task creation for amazon mechanical task service.

		Amazon MTruk is used to crowdsource matching products.

		Initialisation :
			- reference : reference of the product
			- osm_from : the origin osm of a product
			- osm_to : the osm to look into
	"""
	if settings.SANDBOX:
		AWS_SECRET_ACCESS_KEY = settings.AWS_SECRET_ACCESS_KEY
		AWS_ACCESS_KEY_ID = settings.AWS_ACCESS_KEY_ID
	else:
		AWS_SECRET_ACCESS_KEY = 'e6/8e5lcCcESPKT/fe6kYkJtf0+7F2w7459WTJ0v'
		AWS_ACCESS_KEY_ID = 'AKIAIP5JQO7FQX6Q7JAQ'


	def __init__(self, reference = None, osm_from = None, osm_to = None, key = None, hitid = None):
		self.reference = reference
		self.osm_from = osm_from
		self.osm_to = osm_to
		self.key = key
		self.hitid = hitid
		if key is None:
			self.task = None
		else:
			self.task = self.get_task()

		self.mtc = MTurkConnection(aws_access_key_id=MturkHelper.AWS_ACCESS_KEY_ID,
									aws_secret_access_key=MturkHelper.AWS_SECRET_ACCESS_KEY,
									host=settings.HOST)

	def get_all_reviewable_hits(self):
		page_size = 50
		hits = self.mtc.get_reviewable_hits(page_size=page_size)
		print "Total results to fetch %s " % hits.TotalNumResults
		print "Request hits page %i" % 1
		total_pages = float(hits.TotalNumResults)/page_size
		int_total= int(total_pages)
		if(total_pages-int_total>0):
			total_pages = int_total+1
		else:
			total_pages = int_total
		pn = 1
		while pn < total_pages:
			pn = pn + 1
			print "Request hits page %i" % pn
			temp_hits = self.mtc.get_reviewable_hits(page_size=page_size,page_number=pn)
			hits.extend(temp_hits)

		return hits

	def get_hits(self, validate = False, all_hits = False):
		if not all_hits:
			hits = self.get_all_reviewable_hits()
		else:
			hits = self.mtc.get_all_hits()
		for hit in hits:
			print "####################"
			print "--------------------"
			print "HitId = %s"%(hit.HITId)
			assignments = self.mtc.get_assignments(hit.HITId)
			# Getting task associated to hit
			task = Task.objects.filter(hitId = hit.HITId)
			print 'Number of corresponding tasks = %d'%len(task)
			if len(task)>0:
				task = task[0]
			else:
				task = None

			for assignment in assignments:
				print "AssignmentId = %s"%(assignment.AssignmentId)
				print "Answers of the worker %s" % assignment.WorkerId
				for question_form_answer in assignment.answers[0]:
					qid = question_form_answer.qid
					if qid == 'flagged':
						for value in question_form_answer.fields:
							# Saving resultTask
							if task is not None:
								print 'Saving result task, result = %s'%(value)
								resulttask, created = ResultTask.objects.get_or_create(task = task, assignementId = assignment.AssignmentId, workerId = assignment.WorkerId)
								resulttask.reference = value
								resulttask.save()
							elif validate:
								try:
									self.mtc.approve_assignment(assignment.AssignmentId)
								except Exception, e:
									print e
			try:
				if validate:
					self.mtc.disable_hit(hit.HITId)
			except Exception, e:
				print e
							
				print "--------------------"
Пример #20
0
class Mturk():
    def __init__(self):
        self.config = self.set_config()
        self.mturk = MTurkConnection(
            aws_access_key_id=self.config['aws_access_key_id'],
            aws_secret_access_key=self.config['aws_secret_access_key'],
            host=self.config['host'])
        self.mturk_tmpl = MturkTmpl()

    def set_config(self, config_path="config.yml"):
        with open(config_path, 'r') as file:
            config = yaml.load(file)
        return config

    def account_balance(self):
        account_balance = self.mturk.get_account_balance()
        print("Testing connection: You have a balance of: {}".format(
            account_balance))

    def get_hits(self):
        return self.mturk.get_all_hits()

    def get_all_assignments(self, hit_id):
        page_size = 100
        assignments = self.mturk.get_assignments(hit_id, page_size=page_size)
        total_records = int(assignments.TotalNumResults)
        get_page_assignments = lambda page: self.mturk.get_assignments(
            hit_id, page_size=page_size, page_number=page)
        page_nums = self.mturk._get_pages(page_size=page_size,
                                          total_records=total_records)
        assignments_sets = itertools.imap(get_page_assignments, page_nums)
        return itertools.chain.from_iterable(assignments_sets)

    def remove_old_hits(self):
        # Disable old hits.
        for hit in self.get_hits():
            print("Hit {} has been removed.".format(hit.HITId))
            self.mturk.disable_hit(hit.HITId)

    def cal_reward(self, data):
        read_instruction = 3.0
        word_count = len(data['ents']) * 1 / 30.0
        return round((read_instruction + word_count) / 60.0 * 6.0, 2)

    def create_hit(self, data):
        # These parameters define the HIT that will be created
        # question is what we defined above
        # max_assignments is the # of unique Workers you're requesting
        # title, description, and keywords help Workers find your HIT
        # duration is the # of seconds Workers have to complete your HIT
        # reward is what Workers will be paid when you approve their work
        # Check out the documentation on CreateHIT for more details
        response = self.mturk.create_hit(
            question=self.mturk_tmpl.html_question(data),
            max_assignments=1,
            title=self.config['title'],
            description=self.config['description'],
            keywords=self.config['keywords'],
            duration=120,
            reward=self.cal_reward(data))
        return response
Пример #21
0
for i in xrange(NUMBEROFWORKERPOOLS):
    f = open('log/aql' + str(i),'w')
    f.write('')
    f.close()
    chmod('log/aql' +str(i),0o777)


# f = open('log/aql0', 'w')
# f.write('')
# f.close()
# chmod('log/aql0', 0o777)
#
# f = open('log/aql1', 'w')
# f.write('')
# f.close()
# chmod('log/aql1', 0o777)


if not SIMULATION:
    if SANDBOX:
        mturk = MTurkConnection(AWSAKID,
                                AWSSAK,
                                host='mechanicalturk.sandbox.amazonaws.com')
    else:
        mturk = MTurkConnection(AWSAKID,
                                AWSSAK,
                                host='mechanicalturk.amazonaws.com')

    for hit in mturk.get_all_hits():
        mturk.disable_hit(hit.HITId)
Пример #22
0
    print "Request hits page %i" % 1
    total_pages = float(hits.TotalNumResults)/page_size
    int_total= int(total_pages)
    if(total_pages-int_total>0):
        total_pages = int_total+1
    else:
        total_pages = int_total
    pn = 1
    while pn < total_pages:
        pn = pn + 1
        print "Request hits page %i" % pn
        temp_hits = mtc.get_reviewable_hits(page_size=page_size,page_number=pn)
        hits.extend(temp_hits)
    return hits
 
mtc = MTurkConnection(aws_access_key_id='SSSSSSSSSSSSSSSSSSSSSS',
                      aws_secret_access_key='Vkkkkkkkkkkkkkkkkkkkkkkkkkkk',
                      host='mechanicalturk.sandbox.amazonaws.com')
 
 
hits = get_all_reviewable_hits(mtc)


#expire a HIT
for hit in hits:
    print (hit)
    mtc.disable_hit(hit, response_groups=None)
else:
    print ("Successfully Expired all the reviewable HITs")

Пример #23
0
    host=HOST)

url = "https://mturk-poc.herokuapp.com/"
title = "Describe this group of people in your own words"
description = "Describe your first impressions of this group of people however you want."
keywords = ["easy"]
frame_height = 800
amount = 0.05

questionform = ExternalQuestion(url, frame_height)

all_hits = [hit for hit in connection.get_all_hits()]

if all_hits:
  for hit in all_hits:
    connection.disable_hit(hit.HITId)

create_hit_result = connection.create_hit(
  title=title,
  description=description,
  keywords=keywords,
  max_assignments=4,
  lifetime=datetime.timedelta(hours=2),
  question=questionform,
  reward=Price(amount=amount),
  response_groups=('Minimal', 'HITDetail'),
  )

all_hits = [hit for hit in connection.get_all_hits()]

for hit in all_hits:
Пример #24
0
def processHITs(verbose=True,
                approveAll=False,
                deleteAll=False,
                insertComparisons=False):

    mtc = MTurkConnection(host=_host)
    hits = getReviewableHITs(verbose)
    # store hit info here, for persistence
    _hits_vector = []
    _rejected_hits = []
    _flagged_hits = []
    # stats variables
    worker_ids = set()

    for hit in hits:
        assignments = mtc.get_assignments(hit.HITId, page_size=50)
        for assignment in assignments:
            worker_ids.add(assignment.WorkerId)
            if verbose:
                print "Answers of the worker: [%s]" % assignment.WorkerId

            _worker_id = ''
            _worker_exp = 0
            _hit_id = 0
            _assignment_id = ''
            _gui_rating = ''
            _hit_comment = ''
            _hit_rt = 0
            _hit_it = 0
            _trials_results = ''
            _hit_interactions_str = ''
            _hit_reject_flag = False
            _hit_flag = False

            for question_form_answer in assignment.answers[0]:
                key = question_form_answer.qid
                value = question_form_answer.fields

                if key == '_worker_id':
                    _worker_id = value[0]
                    if verbose:
                        print " - Worker ID: [%s]" % (_worker_id)
                elif key == '_worker_exp':
                    _worker_exp = int(value[0])
                    if verbose:
                        print " - Worker experience: [%d]" % (_worker_exp)
                elif key == '_hit_id':
                    _hit_id = int(value[0])
                    if verbose:
                        print " - HIT ID: [%d]" % (_hit_id)
                elif key == '_assignment_id':
                    _assignment_id = value[0]
                    if verbose:
                        print " - Assignment ID: [%s]" % (_assignment_id)
                elif key == '_gui_rating':
                    _gui_rating = value[0]
                    try:
                        _gui_rating = int(_gui_rating)
                    except ValueError:
                        _gui_rating = -1
                    if verbose:
                        print " - GUI rating: [%d/10]" % (_gui_rating)
                elif key == '_hit_comment':
                    _hit_comment = value[0]
                    if verbose:
                        print " - HIT comment: [%s]" % (_hit_comment)
                elif key == '_hit_rt':
                    _hit_rt = int(value[0])
                    if verbose:
                        print " - HIT response time: [%d]" % (_hit_rt)
                elif key == '_hit_it':
                    _hit_it = int(value[0])
                    if verbose:
                        print " - HIT instruction time: [%d]" % (_hit_it)
                elif key == '_trials_results':
                    _trials_results = value[0]
                    if verbose:
                        print " - All HIT's trials results: [%s]" % (
                            _trials_results)
                elif key == '_hit_interactions_str':
                    _hit_interactions_str = value[0]
                    if verbose:
                        print " - HIT interactions string: [%s]" % (
                            _hit_interactions_str)
                elif key == '_hit_reject_flag':
                    _hit_reject_flag = value[0]
                    if str(_hit_reject_flag) == 'false':
                        _hit_reject_flag = False
                    else:
                        _hit_reject_flag = True
                    if verbose:
                        print " - HIT reject flag: [%s]" % (
                            str(_hit_reject_flag))
                elif key == '_hit_flag':
                    _hit_flag = value[0]
                    if _hit_flag == 'Yes':
                        _hit_flag = True
                    else:
                        _hit_flag = False
                    if verbose:
                        print " - HIT information flag: [%s]" % (
                            str(_hit_flag))
                else:
                    print "<----------------------------->"
                    print "ERROR: unknown key [%r]" % (key, )
                    print "Relevant info:"
                    pprint(vars(assignment))
                    pprint(vars(question_form_answer))
                    print "Exiting..."
                    print "<----------------------------->"
                    return

#if insertComparisons:
#    pass
# insert the comparisons into the database

            _hit_data = assignment.__dict__.copy()
            del _hit_data['answers']

            _hit_data['_worker_id'] = _worker_id
            _hit_data['_worker_exp'] = _worker_exp
            _hit_data['_hit_id'] = _hit_id
            _hit_data['_assignment_id'] = _assignment_id
            _hit_data['_gui_rating'] = _gui_rating
            _hit_data['_hit_comment'] = _hit_comment
            _hit_data['_hit_rt'] = _hit_rt
            _hit_data['_hit_it'] = _hit_it
            _hit_data['_trials_results'] = _trials_results
            _hit_data['_hit_interactions_str'] = _hit_interactions_str
            _hit_data['_hit_reject_flag'] = _hit_reject_flag
            _hit_data['_hit_flag'] = _hit_flag

            _hits_vector.append(_hit_data)

            if _hit_reject_flag:
                _rejected_hits.append(_hit_data)
                print "<----------------------------->"
                print "This HIT is low quality - Will be rejected."
                print "Relevant info:"
                pprint(vars(assignment))
                for question_form_answer in assignment.answers[0]:
                    pprint(vars(question_form_answer))
                print "<----------------------------->"
                try:
                    mtc.reject_assignment(assignment.AssignmentId)
                except MTurkRequestError:
                    print "Could not reject [%s]" % (assignment.AssignmentId)
            else:
                if _hit_flag:
                    _flagged_hits.append(_hit_data)
                    print "<----------------------------->"
                    print "This HIT has been flagged by turker."
                    print "Relevant info:"
                    pprint(vars(assignment))
                    for question_form_answer in assignment.answers[0]:
                        pprint(vars(question_form_answer))
                    print "<----------------------------->"

                if approveAll:
                    try:
                        mtc.approve_assignment(assignment.AssignmentId)
                    except MTurkRequestError:
                        print "Could not approve [%s]" % (
                            assignment.AssignmentId)
            if verbose:
                print "<----------------------------->"

            if deleteAll:
                mtc.disable_hit(hit.HITId)

    # print out some stats
    print "Number of HITs = [%d]" % (len(_hits_vector), )
    print "Number of distinct workers = [%d]" % (len(worker_ids), )
    print "Number of rejected HITs = [%d]" % (len(_rejected_hits), )
    print "Number of flagged HITs = [%d]" % (len(_flagged_hits), )

    return_dict = {
        "_all_hits": _hits_vector,
        "_rejected_hits": _rejected_hits,
        "_flagged_hits": _flagged_hits
    }

    if 'MTURK_STORAGE_PATH' in os.environ:
        time_stamp = time.strftime("%Y-%m-%d_%H-%M-%S")
        hit_name = "completed_cocoa_5000"
        filename = os.path.join(os.environ['MTURK_STORAGE_PATH'],
                                hit_name + '_' + time_stamp + ".pkl")
        print "Storing collected hit data at %s" % (filename)
        with open(filename, 'wb') as f:
            pickle.dump(return_dict, f)
    else:
        print "WARNING: MTURK_STORAGE_PATH not set in env. Unable to save hit data."

    return return_dict
Пример #25
0
        total_pages = int_total + 1
    else:
        total_pages = int_total
    pn = 1
    while pn < total_pages:
        pn = pn + 1
        print "Request hits page %i" % pn
        temp_hits = mtc.get_reviewable_hits(page_size=page_size,
                                            page_number=pn)
        hits.extend(temp_hits)
    return hits


mtc = MTurkConnection(
    aws_access_key_id='llllllllllllllllllllllllllllllllllllll',
    aws_secret_access_key='oooooooooooooooooooooooooooooooooooo',
    host='mechanicalturk.sandbox.amazonaws.com')

hits = get_all_reviewable_hits(mtc)

for hit in hits:
    assignments = mtc.get_assignments(hit.HITId)
    mtc.disable_hit(hit.HITId, response_groups=None)
    for assignment in assignments:
        print "Answers of the worker %s" % assignment.WorkerId
        for question_form_answer in assignment.answers:
            for element in question_form_answer:
                for value in element.fields:
                    print "%s" % (value)
        print "------------------------------------------------"
class MTurkProvider(object):
    description = 'This is a task authored by a requester on Daemo, a research crowdsourcing platform. ' \
                  'Mechanical Turk workers are welcome to do it'
    keywords = ['daemo']
    countries = ['US', 'CA']
    min_hits = 1000

    def __init__(self, host, aws_access_key_id, aws_secret_access_key):
        self.host = host
        self.connection = MTurkConnection(
            aws_access_key_id=aws_access_key_id,
            aws_secret_access_key=aws_secret_access_key,
            host=settings.MTURK_HOST
        )
        self.connection.APIVersion = "2014-08-15"
        if not self.host:
            raise ValueError("Please provide a host url")

    def get_connection(self):
        return self.connection

    @staticmethod
    def _mturk_system_qualifications(qualification):
        requirements = []
        for item in qualification.items.all():
            if item.expression['attribute'] not in ['location', 'approval_rate', 'total_tasks']:
                continue
            requirement = None
            if item.expression['attribute'] == 'location':
                op = OP_IN if item.expression['operator'] == 'in' else OP_NOT_IN
                requirement = MultiLocaleRequirement(op, [val.strip() for val in item.expression['value'] if
                                                          val is not None and val != ''])
            elif item.expression['attribute'] == 'approval_rate':
                op = OP_GT if item.expression['operator'] == 'gt' else OP_LT
                requirement = PercentAssignmentsApprovedRequirement(op, item.expression['value'])
            elif item.expression['attribute'] == 'total_tasks':
                op = OP_GT if item.expression['operator'] == 'gt' else OP_LT
                requirement = NumberHitsApprovedRequirement(op, item.expression['value'])

            requirements.append(requirement)
        return requirements

    def get_qualifications(self, project, boomerang_threshold, add_boomerang):
        requirements = []
        if project.qualification is not None:
            requirements += self._mturk_system_qualifications(project.qualification)
        boomerang_qual, success = self.create_qualification_type(owner_id=project.owner_id,
                                                                 project_id=project.group_id,
                                                                 name='Boomerang Score #{}'.format(project.group_id),
                                                                 flag=FLAG_Q_BOOMERANG,
                                                                 description='No description available')
        boomerang = None
        if boomerang_threshold <= int(settings.BOOMERANG_MIDPOINT * 100):
            for i, bucket in enumerate(WAIT_LIST_BUCKETS):
                if int(bucket[1] * 100) <= boomerang_threshold:

                    boomerang_blacklist, success = \
                        self.create_qualification_type(owner_id=project.owner_id,
                                                       name='Boomerang Waitlist #{}-{}'.format(project.group_id, len(
                                                           WAIT_LIST_BUCKETS) - i),
                                                       flag=FLAG_Q_BOOMERANG,
                                                       description='No description available',
                                                       deny=True,
                                                       project_id=project.group_id,
                                                       bucket=bucket)
                    if success and add_boomerang:
                        boomerang = BoomerangRequirement(qualification_type_id=boomerang_blacklist.type_id,
                                                         comparator=OP_DNE,
                                                         integer_value=None)
                        requirements.append(boomerang)

        else:
            boomerang = BoomerangRequirement(qualification_type_id=boomerang_qual.type_id, comparator=OP_GTEQ,
                                             integer_value=boomerang_threshold)
            if success and add_boomerang:
                requirements.append(boomerang)
        return Qualifications(requirements), boomerang_qual

    def create_hits(self, project, tasks=None, repetition=None):
        # if project.min_rating > 0:
        #     return 'NOOP'
        if not tasks:
            cursor = connection.cursor()
            # noinspection SqlResolve
            query = '''
                SELECT
                  max(id)                   id,
                  repetition,
                  group_id,
                  repetition - sum(existing_assignments) remaining_assignments,
                  min_rating
                FROM (
                       SELECT
                         t_rev.id,
                         t.group_id,
                         t.min_rating,
                         p.repetition,
                         CASE WHEN ma.id IS NULL OR ma.status IN (%(skipped)s, %(rejected)s, %(expired)s)
                           THEN 0
                         ELSE 1 END existing_assignments
                       FROM crowdsourcing_task t
                         INNER JOIN crowdsourcing_project p ON t.project_id = p.id
                         INNER JOIN crowdsourcing_task t_rev ON t_rev.group_id = t.group_id
                         LEFT OUTER JOIN mturk_mturkhit mh ON mh.task_id = t_rev.id
                         LEFT OUTER JOIN mturk_mturkassignment ma ON ma.hit_id = mh.id
                       WHERE t.project_id = (%(project_id)s) AND t_rev.exclude_at IS NULL
                       AND t_rev.deleted_at IS NULL
                ) t
                GROUP BY group_id, repetition, min_rating HAVING sum(existing_assignments) < repetition;
            '''
            cursor.execute(query, {'skipped': TaskWorker.STATUS_SKIPPED,
                                   'rejected': TaskWorker.STATUS_REJECTED,
                                   'expired': TaskWorker.STATUS_EXPIRED,
                                   'project_id': project.id})
            tasks = cursor.fetchall()

        rated_workers = Rating.objects.filter(origin_type=Rating.RATING_REQUESTER).count()
        add_boomerang = rated_workers > 0

        duration = project.timeout if project.timeout is not None else datetime.timedelta(hours=24)
        lifetime = project.deadline - timezone.now() if project.deadline is not None else datetime.timedelta(
            days=7)

        for task in tasks:
            question = self.create_external_question(task[0])
            mturk_hit = MTurkHIT.objects.filter(task_id=task[0]).first()
            qualifications, boomerang_qual = self.get_qualifications(project=project,
                                                                     boomerang_threshold=int(
                                                                         round(task[4], 2) * 100),
                                                                     add_boomerang=add_boomerang)
            qualifications_mask = 0
            if qualifications is not None:
                qualifications_mask = FLAG_Q_LOCALE + FLAG_Q_HITS + FLAG_Q_RATE + FLAG_Q_BOOMERANG
            hit_type, success = self.create_hit_type(title=project.name, description=self.description,
                                                     price=project.price,
                                                     duration=duration, keywords=self.keywords,
                                                     approval_delay=datetime.timedelta(days=2),
                                                     qual_req=qualifications,
                                                     qualifications_mask=qualifications_mask,
                                                     boomerang_threshold=int(round(task[4], 2) * 100),
                                                     owner_id=project.owner_id, boomerang_qual=boomerang_qual)
            if not success:
                return 'FAILURE'

            if mturk_hit is None:
                try:
                    hit = self.connection.create_hit(hit_type=hit_type.string_id,
                                                     max_assignments=task[3],
                                                     lifetime=lifetime,
                                                     question=question)[0]
                    self.set_notification(hit_type_id=hit.HITTypeId)
                    mturk_hit = MTurkHIT(hit_id=hit.HITId, hit_type=hit_type, task_id=task[0])
                except MTurkRequestError as e:
                    error = e.errors[0][0]
                    if error == 'AWS.MechanicalTurk.InsufficientFunds':
                        message = {
                            "type": "ERROR",
                            "detail": "Insufficient funds on your Mechanical Turk account!",
                            "code": error
                        }

                        redis_publisher = RedisPublisher(facility='bot', users=[project.owner])
                        message = RedisMessage(json.dumps(message))
                        redis_publisher.publish_message(message)
                    return 'FAILED'
            else:
                if mturk_hit.hit_type_id != hit_type.id:
                    result, success = self.change_hit_type_of_hit(hit_id=mturk_hit.hit_id,
                                                                  hit_type_id=hit_type.string_id)
                    if success:
                        mturk_hit.hit_type = hit_type
            mturk_hit.save()
        return 'SUCCESS'

    def create_hit_type(self, owner_id, title, description, price, duration, boomerang_threshold, keywords=None,
                        approval_delay=None, qual_req=None,
                        qualifications_mask=0, boomerang_qual=None):
        hit_type = MTurkHITType.objects.filter(owner_id=owner_id, name=title, description=description,
                                               price=Decimal(str(price)),
                                               duration=duration,
                                               qualifications_mask=qualifications_mask,
                                               boomerang_threshold=boomerang_threshold).first()
        if hit_type is not None:
            return hit_type, True

        reward = Price(price)
        try:
            mturk_ht = self.connection.register_hit_type(title=title, description=description, reward=reward,
                                                         duration=duration, keywords=keywords,
                                                         approval_delay=approval_delay,
                                                         qual_req=qual_req)[0]
            hit_type = MTurkHITType(owner_id=owner_id, name=title, description=description,
                                    price=Decimal(str(price)),
                                    keywords=keywords, duration=duration,
                                    qualifications_mask=qualifications_mask,
                                    boomerang_qualification=boomerang_qual,
                                    boomerang_threshold=boomerang_threshold)
            hit_type.string_id = mturk_ht.HITTypeId
            hit_type.save()
        except MTurkRequestError:
            return None, False
        return hit_type, True

    def create_external_question(self, task, frame_height=800):
        task_hash = Hashids(salt=settings.SECRET_KEY, min_length=settings.ID_HASH_MIN_LENGTH)
        task_id = task_hash.encode(task)
        url = self.host + '/mturk/task/?taskId=' + task_id
        question = ExternalQuestion(external_url=url, frame_height=frame_height)
        return question

    def update_max_assignments(self, task):
        task = Task.objects.get(id=task['id'])
        mturk_hit = task.mturk_hit
        if not mturk_hit:
            raise MTurkHIT.DoesNotExist("This task is not associated to any mturk hit")
        assignments_completed = task.task_workers.filter(~Q(status__in=[TaskWorker.STATUS_REJECTED,
                                                                        TaskWorker.STATUS_SKIPPED,
                                                                        TaskWorker.STATUS_EXPIRED])).count()
        remaining_assignments = task.project.repetition - assignments_completed
        if remaining_assignments > 0 and mturk_hit.num_assignments == mturk_hit.mturk_assignments. \
            filter(status=TaskWorker.STATUS_SUBMITTED).count() and \
                mturk_hit.mturk_assignments.filter(status=TaskWorker.STATUS_IN_PROGRESS).count() == 0:
            self.add_assignments(hit_id=mturk_hit.hit_id, increment=1)
            self.extend_hit(hit_id=mturk_hit.hit_id)
            mturk_hit.status = MTurkHIT.STATUS_IN_PROGRESS
            mturk_hit.num_assignments += 1
            mturk_hit.save()
        elif remaining_assignments == 0:
            self.expire_hit(hit_id=mturk_hit.hit_id)
            mturk_hit.status = MTurkHIT.STATUS_EXPIRED
            mturk_hit.save()
        elif remaining_assignments > 0 and \
                mturk_hit.status == MTurkHIT.STATUS_EXPIRED:
            self.extend_hit(hit_id=mturk_hit.hit_id)
            mturk_hit.status = MTurkHIT.STATUS_IN_PROGRESS
        return 'SUCCESS'

    def get_assignment(self, assignment_id):
        try:
            return self.connection.get_assignment(assignment_id)[0], True
        except MTurkRequestError as e:
            error = e.errors[0][0]
            if error == 'AWS.MechanicalTurk.InvalidAssignmentState':
                return assignment_id, False
            return None, False

    def set_notification(self, hit_type_id):
        self.connection.set_rest_notification(hit_type=hit_type_id,
                                              url=self.host + '/api/mturk/notification',
                                              event_types=['AssignmentReturned', 'AssignmentAbandoned',
                                                           'AssignmentAccepted', 'AssignmentSubmitted'])

    def approve_assignment(self, task_worker):
        task_worker_obj = TaskWorker.objects.get(id=task_worker['id'])
        if hasattr(task_worker_obj, 'mturk_assignments') and task_worker_obj.mturk_assignments.first() is not None:
            try:
                self.connection.approve_assignment(task_worker_obj.mturk_assignments.first().assignment_id)
            except MTurkRequestError:
                return False
        return True

    def reject_assignment(self, task_worker):
        task_worker_obj = TaskWorker.objects.get(id=task_worker['id'])
        if hasattr(task_worker_obj, 'mturk_assignments') and task_worker_obj.mturk_assignments.first() is not None:
            try:
                self.connection.reject_assignment(task_worker_obj.mturk_assignments.first().assignment_id)
            except MTurkRequestError:
                return False
        return True

    def expire_hit(self, hit_id):
        try:
            self.connection.expire_hit(hit_id)
        except MTurkRequestError:
            return False
        return True

    def disable_hit(self, hit_id):
        try:
            self.connection.disable_hit(hit_id)
        except MTurkRequestError:
            return False
        return True

    def extend_hit(self, hit_id):
        try:
            self.connection.extend_hit(hit_id=hit_id, expiration_increment=604800)  # 7 days
        except MTurkRequestError:
            return False
        return True

    def add_assignments(self, hit_id, increment=1):
        try:
            self.connection.extend_hit(hit_id=hit_id, assignments_increment=increment)
        except MTurkRequestError:
            return False
        return True

    def test_connection(self):
        try:
            return self.connection.get_account_balance()[0], True
        except MTurkRequestError as e:
            error = e.errors[0][0]
            if error == 'AWS.NotAuthorized':
                return None, False
            return None, False

    def get_account_balance(self):
        try:
            return self.connection.get_account_balance()[0]
        except MTurkRequestError:
            return None

    def create_qualification_type(self, owner_id, name, flag, description, project_id, auto_granted=False,
                                  auto_granted_value=None, deny=False, bucket=None):
        # noinspection SqlResolve
        query = '''
            SELECT * FROM (
                SELECT
                  task.target_id,
                  task.username,
                  round(task.task_w_avg::NUMERIC, 2) rating
                  --round(coalesce(task.task_w_avg, requester.requester_w_avg,
                  --  platform.platform_w_avg)::NUMERIC, 2) rating
                FROM (
                               SELECT
                                 target_id,
                                 origin_id,
                                 project_id,
                                 username,
                                 sum(weight * power((%(BOOMERANG_TASK_ALPHA)s), t.row_number))
                                 / sum(power((%(BOOMERANG_TASK_ALPHA)s), t.row_number)) task_w_avg
                               FROM (

                                      SELECT
                                        r.id,
                                        r.origin_id,
                                        p.group_id                              project_id,
                                        weight,
                                        r.target_id,
                                        -1 + row_number()
                                        OVER (PARTITION BY target_id
                                          ORDER BY tw.created_at DESC) AS row_number,
                                          u.username username

                                      FROM crowdsourcing_rating r
                                        INNER JOIN crowdsourcing_task t ON t.id = r.task_id
                                        INNER JOIN crowdsourcing_project p ON p.id = t.project_id
                                        INNER JOIN crowdsourcing_taskworker tw ON t.id = tw.task_id
                                          AND tw.worker_id=r.target_id
                                        INNER JOIN auth_user u ON u.id = r.target_id
                                      WHERE origin_id = (%(origin_id)s) AND origin_type = (%(origin_type)s)) t
                               GROUP BY origin_id, target_id, project_id, username)
                             task WHERE task.project_id = (%(project_id)s)
            ) r
        '''
        extra_query = 'WHERE rating BETWEEN (%(lower_bound)s) AND (%(upper_bound)s);'
        params = {
            'origin_type': Rating.RATING_REQUESTER, 'origin_id': owner_id, 'project_id': project_id,
            'BOOMERANG_REQUESTER_ALPHA': settings.BOOMERANG_REQUESTER_ALPHA,
            'BOOMERANG_PLATFORM_ALPHA': settings.BOOMERANG_PLATFORM_ALPHA,
            'BOOMERANG_TASK_ALPHA': settings.BOOMERANG_TASK_ALPHA
        }
        obj_params = {'upper_bound': 300, 'lower_bound': 100}
        if deny and bucket is not None:
            query += extra_query
            params.update({'upper_bound': bucket[1], 'lower_bound': bucket[0]})
            obj_params.update({'upper_bound': bucket[1] * 100, 'lower_bound': bucket[0] * 100, 'is_blacklist': True})
        cursor = connection.cursor()
        cursor.execute(query, params=params)
        worker_ratings_raw = cursor.fetchall()
        worker_ratings = [{"worker_id": r[0], "worker_username": r[1], "rating": r[2]} for
                          r in worker_ratings_raw]

        qualification = MTurkQualification.objects.filter(owner_id=owner_id, flag=flag, name=name).first()
        assigned_workers = []
        if qualification is None:
            try:
                qualification_type = self.connection. \
                    create_qualification_type(name=name, description=description,
                                              status='Active',
                                              auto_granted=auto_granted,
                                              auto_granted_value=auto_granted_value)[0]
                qualification = MTurkQualification.objects.create(owner_id=owner_id, flag=flag, name=name,
                                                                  description=description,
                                                                  auto_granted=auto_granted,
                                                                  auto_granted_value=auto_granted_value,
                                                                  type_id=qualification_type.QualificationTypeId,
                                                                  **obj_params)
            except MTurkRequestError:
                return None, False
        else:
            assigned_workers = MTurkWorkerQualification.objects.values('worker').filter(
                qualification=qualification).values_list('worker', flat=True)

        for rating in worker_ratings:
            user_name = rating["worker_username"].split('.')
            if len(user_name) == 2 and user_name[0] == 'mturk':
                mturk_worker_id = user_name[1].upper()
                if mturk_worker_id not in assigned_workers:
                    self.assign_qualification(
                        qualification_type_id=qualification.type_id, worker_id=mturk_worker_id,
                        value=int(rating['rating'] * 100))
                defaults = {
                    'qualification': qualification,
                    'worker': mturk_worker_id,
                    'score': int(rating['rating'] * 100)
                }
                MTurkWorkerQualification.objects.update_or_create(qualification=qualification,
                                                                  worker=mturk_worker_id,
                                                                  defaults=defaults)
        return qualification, True

    def change_hit_type_of_hit(self, hit_id, hit_type_id):
        try:
            result = self.connection.change_hit_type_of_hit(hit_id=hit_id, hit_type=hit_type_id)
        except MTurkRequestError:
            return None, False
        return result, True

    def update_worker_boomerang(self, project_id, worker_id, task_avg, requester_avg):
        """
        Update boomerang for project
        Args:
            project_id:
            worker_id:
            task_avg:
            requester_avg

        Returns:
            str
        """
        hit = MTurkHIT.objects.select_related('hit_type__boomerang_qualification').filter(
            task__project__group_id=project_id).first()
        if hit is not None:
            qualification = hit.hit_type.boomerang_qualification
            worker_qual = MTurkWorkerQualification.objects.filter(qualification=qualification,
                                                                  worker=worker_id).first()
            if worker_qual is not None:
                self.update_score(worker_qual, score=int(task_avg * 100), override=True)
            else:
                MTurkWorkerQualification.objects.create(qualification=qualification, worker=worker_id,
                                                        score=int(task_avg * 100), overwritten=True)
                self.assign_qualification(qualification_type_id=qualification.type_id, worker_id=worker_id,
                                          value=int(task_avg * 100))

                # other_quals = MTurkWorkerQualification.objects.filter(~Q(qualification=qualification),
                #                                                       worker=worker_id,
                #                                                       overwritten=False)
                # for q in other_quals:
                #     self.update_score(q, score=int(requester_avg * 100))
        return 'SUCCESS'

    def update_score(self, worker_qual, score, override=False):
        if worker_qual is None:
            return False
        try:
            self.connection.update_qualification_score(worker_qual.qualification.type_id, worker_qual.worker, score)
            worker_qual.overwritten = override
            worker_qual.score = score
            worker_qual.save()
        except MTurkRequestError:
            return False
        return True

    def assign_qualification(self, qualification_type_id, worker_id,
                             value=1):
        """
        Revoke a qualification from a WorkerId
        Args:
            qualification_type_id:
            worker_id:
            value

        Returns:
            bool
        """
        try:
            self.connection.assign_qualification(qualification_type_id, worker_id,
                                                 value, send_notification=False)
            return True
        except MTurkRequestError:
            return False

    def revoke_qualification(self, qualification_type_id, worker_id):
        try:
            self.connection.revoke_qualification(qualification_type_id=qualification_type_id, subject_id=worker_id)
            return True
        except MTurkRequestError:
            return False

    def notify_workers(self, worker_ids, subject, message_text):
        try:
            self.connection.notify_workers(worker_ids, subject, message_text)
            return True
        except MTurkRequestError:
            return False
Пример #27
0
class MTurk(object):
    """
    A class that wraps a boto.mturk.connection object and provides methods for
    the most common AI2 use cases
    """
    def __init__(self,
                 aws_access_key_id,
                 aws_secret_access_key,
                 host=SANDBOX_HOST):
        """
        initializes the instance with AWS credentials and a host
        :param aws_access_key_id the access key id.
        :param aws_secret_access_key the secret access key.
        :param host the mturk host to connect to
        """
        self.connection = MTurkConnection(
            aws_access_key_id=aws_access_key_id,
            aws_secret_access_key=aws_secret_access_key,
            host=host)
        self.host = host

    def __del__(self):
        """
        close the connection whenever this object goes out of scope
        """
        self.connection.close()

    def get_account_balance(self):
        """
        :return the balance on the mturk account
        """
        return self.connection.get_account_balance()[0]

    def _create_hit(self, params, **kwargs):
        """
        internal helper function for creating a HIT
        :param params the parameters (required and optional) common to all HITs
        :param **kwargs any other parameters needed for a specific HIT type
        :return the created HIT object
        """
        return self.connection.create_hit(
            title=params["title"],
            description=params["description"],
            keywords=params["keywords"],
            max_assignments=params["max_assignments"],
            reward=Price(amount=params["amount"]),
            qualifications=params["qualifications"],
            lifetime=params["lifetime"],
            # optional params below
            annotation=params.get("annotation"),
            **kwargs)

    def create_url_hit(self, params):
        """
        creates a HIT for an external question with a specified URL
        :param params a dict of the HIT parameters. must contain a "url" parameter
        :return the created HIT object
        """
        question = ExternalQuestion(params["url"], params["frame_height"])
        return self._create_hit(params, question=question)

    def create_html_hit(self, params):
        """
        creates a HIT for a question with the specified HTML
        :param params a dict of the HIT parameters, must contain a "html" parameter
        :return the created HIT object
        """
        question = HTMLQuestion(params["html"], params["frame_height"])
        return self._create_hit(params, question=question)

    def create_layout_hit(self, params):
        """
        creates a HIT for a question using the supplied layout id
        :param params a dict of the HIT parameters, must contain a "hit_layout"
               parameters with the layout id, and a "layout_params" parameter
               that's the dict of parameters to feed to the layout.
        """
        # create the LayoutParameters object from the supplied params
        layout_params = LayoutParameters([
            LayoutParameter(name, value)
            for name, value in params["layout_params"]
        ])

        return self._create_hit(params,
                                hit_layout=params["hit_layout"],
                                layout_params=layout_params)

    def delete_all_hits(self):
        """
        Permanently disables/ deletes all of the user's active HITs.
        :param mturk_connection: active mturk connection established by user in the notebook.
        :return:
        """
        my_hits = list(self.get_all_hits())
        for hit in my_hits:
            self.connection.disable_hit(hit.HITId)

    def get_assignments_object_list(self, assignment_dict):
        """
        Returns a list of "<boto.mturk.connection.Assignment object at...>" objects
        assignment_dict: a dictionary of HITId-assignment object pairs
        """
        assignments = []
        for entry in assignment_dict:
            for assignment_object in assignment_dict[entry]:
                assignments.append(assignment_object)
        return assignments

    def get_results_dict(self, HIT_assignments):
        """
        Takes a list of HIT assignment objects as input.
        Returns a list of dictionaries of HITs containing:
        HIT_id: the HIT ID
        worker_id: the worker ID of the Turker who completed the HIT
        answers: a dictionary of qid-answer field value pairs
        """
        assignment_results = []
        for assignment in HIT_assignments:
            HIT_dict = {}
            HIT_dict["assignment_object"] = assignment
            HIT_dict["worker_Id"] = assignment.WorkerId
            HIT_dict["HIT_id"] = assignment.HITId
            answers_dict = {}
            for answer in assignment.answers[0]:
                answers_dict[answer.qid] = answer.fields
                HIT_dict["answers"] = answers_dict
            assignment_results.append(HIT_dict)
        return assignment_results

    def get_all_results(self, hits):
        all_results = {}
        for hid, assignments in self.get_assignments(hits).items():
            all_results[hid] = self.get_results_dict(assignments)
        return all_results

    def get_reviewable_hits(self, annotations=None, detailed=False):
        """
        Get all the reviewable HITs. By default returns minimal HIT objects, but
        will return detailed ones (by necessity) if annotations is specified or
        if detailed is True
        :param annotations an optional set of annotations to retrieve HITs for
        :param detailed do you want detailed HIT objects or minimal ones
        :return a list of HIT objects
        """
        minimal_hits = []
        page_num = 1
        while True:
            more_hits = self.connection.get_reviewable_hits(
                page_size=100, page_number=page_num)
            if more_hits:
                minimal_hits.extend(more_hits)
                page_num += 1
            else:
                break

        if detailed or annotations is not None:
            detailed_hits = [
                self.connection.get_hit(hit.HITId,
                                        response_groups=('Minimal',
                                                         'HITDetail'))
                for hit in minimal_hits
            ]
            return [
                hit for hit in detailed_hits
                if annotation_filter(annotations, hit)
            ]
        else:
            return minimal_hits

    def get_all_hits(self, annotations=None):
        """
        Get all the HITs.
        :param annotations a set of annotations to get HITs for, all HITs if
               not specified
        :return a list of HIT objects
        """

        return [
            hit for hit in self.connection.get_all_hits()
            if annotation_filter(annotations, hit)
        ]

    def get_assignments(self, hits=None, hit_ids=None, status=None):
        """
        Retrieves individual assignments associated with the supplied HITs
        :param hits the HITs to get assignments for
        :status HIT status to filter by
        :return dict from HITId to lists of assignments
        """
        if hit_ids is None:
            hit_ids = [hit.HITId for hit in hits]
        return {
            hit_id: self.connection.get_assignments(hit_id, status=status)
            for hit_id in hit_ids
        }

    def disable_hit(self, hit=None, hit_id=None):
        """
        disable the specified hit (or the hit with the specified id). must
        specify either `hit` or `hit_id`
        :param hit a HIT object to disable
        :param hit_id a HITId to disable
        """
        hit_id = hit.HITId if hit is not None else hit_id
        return self.connection.disable_hit(hit_id)

    def approve_assignment(self,
                           assignment=None,
                           assignment_id=None,
                           feedback=None):
        """
        approve the specified assignment (or the assigment with the specified id)
        must specify either `assignment` or `assignment_id`
        :param assignment an assignment object to approve
        :param assignment_id an AssignmentId to approve
        :param feedback optional feedback for the worker
        """
        assignment_id = assignment.AssignmentId if assignment is not None else assignment_id
        return self.connection.approve_assignment(assignment_id, feedback)

    def reject_assignment(self,
                          assignment=None,
                          assignment_id=None,
                          feedback=None):
        """
        reject the specified assignment (or the assigment with the specified id)
        must specify either `assignment` or `assignment_id`
        :param assignment an assignment object to reject
        :param assignment_id an AssignmentId to reject
        :param feedback optional feedback for the worker
        """
        assignment_id = assignment.AssignmentId if assignment is not None else assignment_id
        return self.connection.reject_assignment(assignment_id, feedback)
Пример #28
0
              #  print 'here' 
                if answer == control_labels[question]:
                    approve = True

            

           # print '%s\t%s'%(question_form_answer.qid, question_form_answer.fields[0])
    
   

    if approve == False:
        for assignment in assignments:
            for question_form_answer in assignment.answers[0]:
                f.writelines(question_form_answer.qid.encode('ascii', 'ignore'))
            conn.reject_assignment(assignment.AssignmentId)
        conn.disable_hit(hit.HITId)
    else:
        for assignment in assignments:
            for question_form_answer in assignment.answers[0]:
                f1.write(question_form_answer.qid.encode('ascii', 'ignore'))
                f1.write(question_form_answer.fields[0].encode('ascii', 'ignore') + '\n')
            conn.approve_assignment(assignment.AssignmentId)
        conn.disable_hit(hit.HITId)
        

     

    	#Uncomment to approve assignment. Approving will remove this assignment from reviewable HITs, so store the data before approving 

    
    #Uncomment to remove all remaining assignments that have not been completed and approved/rejected
Пример #29
0
    def handle(self, *args, **options):
        # create a connection
        mturk = MTurkConnection(
            getattr(settings, 'MTURK_AWS_KEY', settings.MEDIASYNC['AWS_KEY']),
            getattr(settings, 'MTURK_AWS_SECRET',
                    settings.MEDIASYNC['AWS_SECRET']),
            host='mechanicalturk.sandbox.amazonaws.com'
            if options['sandbox'] else 'mechanicalturk.amazonaws.com')

        # if --delete, delete all the old ones first.
        if options['delete_first']:
            for hit in mturk.get_all_hits():
                mturk.disable_hit(hit.HITId)

        if options['exclude']:
            exclude_reader = csv.DictReader(open(options['exclude'], 'r'))
            exclude = set()
            for row in exclude_reader:
                exclude.add(row['td_id'])

        # iterate over items and create them one by one
        cursor = connection.cursor()
        cursor.execute(
            """
            select entity_id, type from matchbox_wikipediainfo, matchbox_entity where entity_id not in (select entity_id from matchbox_sunlightinfo where bio is not null) and bio != '' and bio is not null and entity_id = matchbox_entity.id %s order by entity_id limit %s;
            """ % (
                "and type = '%s'" % options['type'] if options['type'] else '',
                '%s'
            ),  # hack to put the interpolation string back in for PG to catch it
            [options['count']])

        for row in cursor:
            if options['exclude']:
                if str(row[0]).replace('-', '') in exclude:
                    continue

            if options['practice']:
                print row[0]
                continue

            try:
                hit = mturk.create_hit(
                    question=FakeQuestionForm(get_hit_xml(row[0])),
                    max_assignments=3,
                    annotation=row[0],
                    title="Wikipedia match validation",
                    description=
                    "We have matched a set of entities in a database to descriptions pulled from Wikipedia via an automated process. Confirm that the match is correct.",
                    reward=0.06,
                    duration=datetime.timedelta(minutes=30),
                    lifetime=datetime.timedelta(days=7),
                    keywords=['wikipedia', 'matching'],
                    approval_delay=datetime.timedelta(days=3),
                    qualifications=Qualifications([
                        PercentAssignmentsApprovedRequirement(
                            "GreaterThan", 90)
                    ]))
                print hit[0].HITId
            except Exception as e:
                sys.stderr.write("Failed to create hit %s\n" % row[0])
                sys.stderr.write(getattr(e, 'body', ''))
                sys.stderr.write('\n')
            except:
                pass
Пример #30
0
    description='Approve work from Amazon Mechanical Turk')
parser.add_argument('-r',
                    '--resultsfile',
                    required=True,
                    help='Filename for tab delimited CSV file')
parser.add_argument(
    '-s',
    '--sandbox',
    action='store_true',
    help=
    'Run the command in the Mechanical Turk Sandbox (used for testing purposes)'
)
args = parser.parse_args()

if args.sandbox:
    if not config.has_section('MTurk'):
        config.add_section('MTurk')
    config.set('MTurk', 'sandbox', 'True')
    mturk_website = 'requestersandbox.mturk.com'

results = pd.read_csv(args.resultsfile, sep='\t')

mtc = MTurkConnection(is_secure=True)

for i, j in enumerate(list(results['assignmentid'])):
    print("deleting hit..{0}".format(i))
    try:
        mtc.disable_hit(results['hitid'][i])
    except:
        continue
Пример #31
0
# -*- coding: utf-8 -*-
import os
from flask import Flask, render_template, url_for, request, make_response
from boto.mturk.connection import MTurkConnection
from boto.mturk.question import ExternalQuestion
from boto.mturk.qualification import Qualifications, PercentAssignmentsApprovedRequirement, NumberHitsApprovedRequirement
from boto.mturk.price import Price
import sys

hit = sys.argv[1]

#Start Configuration Variables
AWS_ACCESS_KEY_ID = os.environ['AWS_ACCESS_KEY_ID']
AWS_SECRET_ACCESS_KEY = os.environ['AWS_SECRET_ACCESS_KEY']

if len(sys.argv) > 2 and sys.argv[2] == 'pub':
    AMAZON_HOST = "mechanicalturk.amazonaws.com"
else:
    AMAZON_HOST = "mechanicalturk.sandbox.amazonaws.com"

connection = MTurkConnection(aws_access_key_id=AWS_ACCESS_KEY_ID,
                             aws_secret_access_key=AWS_SECRET_ACCESS_KEY,
                             host=AMAZON_HOST)

connection.disable_hit(hit)
Пример #32
0
            question = question_form_answer.qid.replace('\n', '')
            answer = question_form_answer.fields[0]
            # print question
            if question in control_labels.keys():
                #  print 'here'
                if answer == control_labels[question]:
                    approve = True

        # print '%s\t%s'%(question_form_answer.qid, question_form_answer.fields[0])

    if approve == False:
        for assignment in assignments:
            for question_form_answer in assignment.answers[0]:
                f.writelines(question_form_answer.qid.encode(
                    'ascii', 'ignore'))
            conn.reject_assignment(assignment.AssignmentId)
        conn.disable_hit(hit.HITId)
    else:
        for assignment in assignments:
            for question_form_answer in assignment.answers[0]:
                f1.write(question_form_answer.qid.encode('ascii', 'ignore'))
                f1.write(
                    question_form_answer.fields[0].encode('ascii', 'ignore') +
                    '\n')
            conn.approve_assignment(assignment.AssignmentId)
        conn.disable_hit(hit.HITId)

    #Uncomment to approve assignment. Approving will remove this assignment from reviewable HITs, so store the data before approving

    #Uncomment to remove all remaining assignments that have not been completed and approved/rejected
Пример #33
0
def deleteAllHits():
    # this function should probably take an input parameter 
    # of a pickle file with the hits to be disposed...
    mtc = MTurkConnection(host=_host)
    for hit in mtc.get_all_hits():
        mtc.disable_hit( hit.HITId )
Пример #34
0
                             host=HOST)

url = "https://mturk-poc.herokuapp.com/"
title = "Describe this group of people in your own words"
description = "Describe your first impressions of this group of people however you want."
keywords = ["easy"]
frame_height = 800
amount = 0.05

questionform = ExternalQuestion(url, frame_height)

all_hits = [hit for hit in connection.get_all_hits()]

if all_hits:
    for hit in all_hits:
        connection.disable_hit(hit.HITId)

create_hit_result = connection.create_hit(
    title=title,
    description=description,
    keywords=keywords,
    max_assignments=4,
    lifetime=datetime.timedelta(hours=2),
    question=questionform,
    reward=Price(amount=amount),
    response_groups=('Minimal', 'HITDetail'),
)

all_hits = [hit for hit in connection.get_all_hits()]

for hit in all_hits:
Пример #35
0
def cleanup():
    """Remove any boto test related HIT's"""

    conn = MTurkConnection(host='mechanicalturk.sandbox.amazonaws.com')
    current_page = 1
    page_size = 10
    total_disabled = 0
    ignored = []

    while True:
        # reset the total for this loop
        disabled_count = 0

        # search all the hits in the sandbox
        search_rs = conn.search_hits(page_size=page_size,
                                     page_number=current_page)

        # success?
        if search_rs.status:
            for hit in search_rs:
                # delete any with Boto in the description
                print 'hit id:%s Status:%s, desc:%s' % (
                    hit.HITId, hit.HITStatus, hit.Description)
                if hit.Description.find('Boto') != -1:
                    if hit.HITStatus != 'Reviewable':
                        print 'Disabling hit id:%s %s' % (hit.HITId,
                                                          hit.Description)
                        disable_rs = conn.disable_hit(hit.HITId)
                        if disable_rs.status:
                            disabled_count += 1
                            # update the running total
                            total_disabled += 1
                        else:
                            print 'Error when disabling, code:%s, message:%s' % (
                                disable_rs.Code, disable_rs.Message)
                    else:
                        print 'Disposing hit id:%s %s' % (hit.HITId,
                                                          hit.Description)
                        dispose_rs = conn.dispose_hit(hit.HITId)
                        if dispose_rs.status:
                            disabled_count += 1
                            # update the running total
                            total_disabled += 1
                        else:
                            print 'Error when disposing, code:%s, message:%s' % (
                                dispose_rs.Code, dispose_rs.Message)

                else:
                    if hit.HITId not in ignored:
                        print 'ignored:%s' % hit.HITId
                        ignored.append(hit.HITId)

            # any more results?
            if int(search_rs.TotalNumResults) > current_page * page_size:
                # if we have disabled any HITs on this page
                # then we don't need to go to a new page
                # otherwise we do
                if not disabled_count:
                    current_page += 1
            else:
                # no, we're done
                break
        else:
            print 'Error performing search, code:%s, message:%s' % (
                search_rs.Code, search_rs.Message)
            break

    total_ignored = len(ignored)
    print 'Processed: %d HITs, disabled/disposed: %d, ignored: %d' % (
        total_ignored + total_disabled, total_disabled, total_ignored)
Пример #36
0
class TranscriptionPipelineHandler():
    def __init__(self):
        aws_id = os.environ['AWS_ACCESS_KEY_ID']
        aws_k = os.environ['AWS_ACCESS_KEY']

        self.conn = MTurkConnection(aws_access_key_id=aws_id,\
                          aws_secret_access_key=aws_k,\
                          host=HOST)

        self.ah = AssignmentHandler(self.conn)
        self.th = TurkerHandler(self.conn)
        self.hh = HitHandler(self.conn, TEMPLATE_DIR)
        self.mh = MongoTranscriptionHandler()
        self.wh = WavHandler()
        self.ph = PromptHandler()
        self.filter = Filter(self.mh)
        self.balance = self.conn.get_account_balance()[0].amount
        self.logger = logging.getLogger(
            "transcription_engine.transcription_pipeline_handler")

    def audio_clip_referenced_to_hit(self, priority=1, max_queue_size=10):
        for audio_clip in self.mh.get_artifacts_by_state(
                "audio_clips", "Referenced"):
            audio_clip_id = audio_clip["_id"]
            self.mh.queue_clip(audio_clip_id, priority, max_queue_size)
            response = self.audio_clip_queue_to_hit()

    def audio_clip_queued_to_hit(self, priority=1, max_queue_size=10):
        for audio_clip in self.mh.get_artifacts("audio_clips",
                                                {"state": "Queued"}):
            audio_clip_id = audio_clip["_id"]
            response = self.audio_clip_queue_to_hit()
            #===================================================================
            # elif state == "Hit":
            #     print("In hit: %s"%audio_clip_url)
            #===================================================================

    def audio_clip_queue_to_hit(self, cost_sensitive=True):
        """Take queued audio clips from the audio clip queue
            put them in a hit and create the hit.
            If successful, update the audio clip state."""
        clip_queue = self.mh.get_audio_clip_queue()
        clip_pairs = self.mh.get_audio_clip_pairs(clip_queue)
        if clip_pairs:
            hit_title = "Audio Transcription"
            question_title = "List and Transcribe"
            description = "Transcribe the audio clip by typing the words the person says in order."
            keywords = "audio, transcription, audio transcription"
            if cost_sensitive:
                reward_per_clip = 0.02
                max_assignments = 3
                estimated_cost = self.hh.estimate_html_HIT_cost(
                    clip_pairs, reward_per_clip, max_assignments)
                clips_in_hits = self.mh.clips_already_in_hit(clip_pairs)
                if clips_in_hits:
                    #If one or more clips are already in a HIT, remove it from the queue
                    self.mh.remove_audio_clips_from_queue(clips_in_hits)
                elif self.balance - estimated_cost >= 250:
                    #if we have enough money, create the HIT
                    response = self.hh.make_html_transcription_HIT(
                        clip_pairs, hit_title, question_title, description,
                        keywords)
                    self.balance = self.balance - estimated_cost
                    if type(response) == ResultSet and len(
                            response) == 1 and response[0].IsValid:
                        response = response[0]
                        self.mh.remove_audio_clips_from_queue(clip_queue)
                        audio_clip_ids = [
                            w["audio_clip_id"] for w in clip_queue
                        ]
                        hit_id = response.HITId
                        hit_type_id = response.HITTypeId
                        self.mh.create_transcription_hit_artifact(
                            hit_id, hit_type_id, clip_queue, "New")
                        self.logger.info("Successfully created HIT: %s" %
                                         hit_id)
                        return self.mh.update_audio_clips_state(
                            audio_clip_ids, "Hit")
                else:
                    pass
        return False

    def load_assignments_hit_to_submitted(self):
        """Check all assignments for audio clip IDs.
            Update the audio clips.
            This is a non-destructive load of the assignments from MTurk"""
        hits = self.conn.get_all_hits()
        for hit in hits:
            transcription_dicts = [{}]
            hit_id = hit.HITId
            assignments = self.conn.get_assignments(hit_id)
            have_all_assignments = True
            assignment_ids = []
            for assignment in assignments:
                assignment_ids.append(assignment.AssignmentId)
                if self.mh.get_artifact("assignments",
                                        {"_id": assignment.AssignmentId}):
                    #We create assignments here, so if we already have it, skip
                    continue
                else:
                    have_all_assignments = False
                transcription_ids = []
                transcription_dicts = self.ah.get_assignment_submitted_transcriptions(
                    assignment)
                if transcription_dicts and len(transcription_dicts) == 10:
                    pass
                for transcription in transcription_dicts:
                    if not self.mh.get_artifact_by_id(
                            "audio_clips", transcription["audio_clip_id"]):
                        self.logger.info("Assignment(%s) with unknown audio clip(%s) skipped"%\
                                    (assignment.AssignmentId,transcription["audio_clip_id"]))
                        break
                    self.mh.update_transcription_state(transcription,
                                                       "Submitted")
                    self.mh.update_audio_clips_state(
                        [transcription["audio_clip_id"]], "Submitted")
                    transcription_ids.append(
                        self.mh.get_artifact(
                            "transcriptions", {
                                "audio_clip_id":
                                transcription["audio_clip_id"],
                                "assignment_id": transcription["assignment_id"]
                            }, "_id"))
                else:
                    self.mh.create_assignment_artifact(assignment,
                                                       transcription_ids,
                                                       "Submitted")
            if assignments and not have_all_assignments:
                self.mh.update_transcription_hit_state(hit_id, "Submitted")
            print("Transcriptions HIT(%s) submitted assignments: %s " %
                  (hit_id, assignment_ids))

    def assignment_submitted_approved(self):
        """For all submitted assignments,
            if an answered question has a reference transcription,
            check the WER.
            If all the answered questions with reference transcriptions
            have an acceptable WER, approve the assignment and update
            the audio clips and transcriptions."""
        assignments = self.mh.get_artifacts_by_state("assignments",
                                                     "Submitted")
        rejected_feedback = "I'm sorry but your work in assignment(%s) was rejected because" +\
                            " one or more of your transcriptions " +\
                            " had a word error rate above the maximum acceptable"+\
                            " word error rate of %s. Omitted words and words that "+\
                            " differed by more than %s "+\
                            " characters were counted as an error."
        accepted_feedback = "Your average word error rate on assignment(%s) was %s."+\
                            " Assignment accepted! Thanks for your hard work."
        for assignment in assignments:
            assignment_id = assignment["_id"]
            transcription_ids = assignment["transcriptions"]
            transcriptions = self.mh.get_artifacts("transcriptions", "_id",
                                                   transcription_ids)

            worker_id = assignment["worker_id"]
            worker_id = self.mh.create_worker_artifact(worker_id)

            approved, average_wer = self.filter.approve_assignment(
                transcriptions)
            if approved:
                try:
                    self.conn.approve_assignment(
                        assignment_id,
                        accepted_feedback % (assignment_id, average_wer))
                except MTurkRequestError as e:
                    print(e)
                else:
                    self.mh.update_assignment_state(assignment, "Approved")
                    for transcription in transcriptions:
                        #Approve transcriptions without references in the same assignment
                        reference_id = self.mh.get_artifact_by_id(
                            "audio_clips", transcription["audio_clip_id"],
                            "reference_transcription_id")
                        if not reference_id:
                            self.mh.update_transcription_state(
                                transcription, "Approved")
                    print("Approved transcription ids: %s" % transcription_ids)
            else:
                #Don't deny for now
                feedback = rejected_feedback % (assignment_id,
                                                self.filter.WER_THRESHOLD,
                                                self.filter.CER_THRESHOLD)
                self.logger.info(feedback)
                self.conn.reject_assignment(assignment_id, feedback)
                self.mh.update_assignment_state(assignment, "Denied")
                #print("Assignments not aproved %s "%denied)
            #Update the worker
            if approved:
                self.mh.add_assignment_to_worker(worker_id,
                                                 (assignment_id, average_wer))

    def _load_rm_audio_source_file_to_clipped(
            self,
            file_dir,
            prompt_file_uri,
            base_clip_dir,
            sample_rate=16000,
            http_base_url="http://www.cis.upenn.edu/~tturpen/wavs/",
            init_clip_count=200):
        """For an audio directory,
            see which files are new and not an audio source already
            """
        prompt_dict = self.ph.get_prompts(prompt_file_uri)
        count = 0
        for root, dirs, files in os.walk(file_dir):
            for f in files:
                if count == init_clip_count:
                    return
                system_uri = os.path.join(root, f)
                out_uri = system_uri.strip(".sph") + ".wav"
                out_uri = os.path.basename(out_uri)
                out_uri = os.path.join(root, (out_uri))
                spkr_id = str(os.path.relpath(root, file_dir))
                #sph to wav
                if not f.endswith(".wav") and not os.path.exists(out_uri):
                    try:
                        self.wh.sph_to_wav(system_uri, out_uri=out_uri)
                    except WavHandlerException as e:
                        self.logger.error("Unable to create wav from sph: " +
                                          str(e))

                if os.path.exists(out_uri) and out_uri.endswith(".wav"):
                    #create audio source artifact
                    count += 1
                    wav_filename = os.path.basename(out_uri)
                    prompt_id = os.path.basename(out_uri).strip(".wav").upper()
                    encoding = ".wav"
                    sample_rate = 16000
                    disk_space = os.stat(out_uri).st_size
                    length_seconds = self.wh.get_audio_length(out_uri)
                    if prompt_id in prompt_dict:
                        transcription_prompt = prompt_dict[prompt_id]
                    else:
                        #No prompt found
                        raise PromptNotFound
                    source_id = self.mh.create_audio_source_artifact(
                        out_uri, disk_space, length_seconds, sample_rate,
                        spkr_id, encoding)
                    #create audio clip artifact
                    audio_clip_uri = os.path.join(base_clip_dir, spkr_id,
                                                  wav_filename)
                    clip_dir = os.path.dirname(audio_clip_uri)
                    if not os.path.exists(clip_dir):
                        os.makedirs(clip_dir)
                    if not os.path.exists(audio_clip_uri):
                        copyfile(out_uri, audio_clip_uri)
                    #http_url
                    http_url = os.path.join(http_base_url, spkr_id,
                                            wav_filename)
                    clip_id = self.mh.create_audio_clip_artifact(
                        source_id, 0, -1, audio_clip_uri, http_url,
                        length_seconds, disk_space)

                    #Update the audio source, updates state too
                    self.mh.update_audio_source_audio_clip(source_id, clip_id)

                    #Create the reference transcription artifact
                    transcription_id = self.mh.create_reference_transcription_artifact(
                        clip_id, transcription_prompt, "Gold")
                    #Completes audio clip to Referenced
                    self.mh.update_audio_clip_reference_transcription(
                        clip_id, transcription_id)

    def all_workers_liveness(self):
        workers = self.mh.get_all_workers()
        for worker in workers:
            worker_id = worker["_id"]
            approved, denied = self.mh.get_worker_assignments(worker)
            print("Worker(%s) assignments, approved(%s) denied(%s)" %
                  (worker["_id"], approved, denied))
            selection = input(
                "1. Show denied transcriptions and references.\n" +
                "2. Show accepted transcriptions and references.\n" +
                "3. Show both denied and accepted transcriptions.")
            if selection == 1 or selection == 3:
                print("Approved transcriptions")
                for assignment_id in approved:
                    transcription_pairs = self.mh.get_transcription_pairs(
                        assignment_id)
                    for pair in transcription_pairs:
                        print("Reference:\n\t%s\nHypothesis:\n\t%s\n" %
                              (pair[0], pair[1]))
            if selection == 2 or selection == 3:
                print("Denied transcriptions")
                for assignment_id in denied:
                    transcription_pairs = self.mh.get_transcription_pairs(
                        assignment_id)
                    for pair in transcription_pairs:
                        print("Reference:\n\t%s\nHypothesis:\n\t%s\n" %
                              (pair[0], pair[1]))

    def stats(self):
        workers = self.mh.get_all_workers()
        all_wer_per_approved_assignment = 0.0
        total_accepted = 0.0
        for worker in workers:
            worker_wer = 0.0
            worker_id = worker["_id"]
            approved, denied = self.mh.get_worker_assignments_wer(worker)
            for w in approved:
                all_wer_per_approved_assignment += float(w[1])
                worker_wer += float(w[1])
                total_accepted += 1
            if approved:
                worker_average_wer = worker_wer / len(approved)
                print("%s,%s" % (len(approved), worker_average_wer))
            #print("Worker(%s) approved assignments(%s)\n denied assignments(%s)"%(worker_id,approved,denied))
        av = all_wer_per_approved_assignment / total_accepted
        print("Average WER per assignment(%s)" % (av))

    def get_assignment_stats(self):
        self.effective_hourly_wage_for_approved_assignments(.20)

    def effective_hourly_wage_for_approved_assignments(self,
                                                       reward_per_assignment):
        """Calculate the effective hourly wage for Approved Assignments"""
        approved_assignments = self.mh.get_artifacts_by_state(
            "assignments", "Approved")
        total = datetime.timedelta(0)
        count = 0
        for assignment in approved_assignments:
            if "SubmitTime" in assignment:
                accepted = datetime.datetime.strptime(assignment["AcceptTime"],
                                                      "%Y-%m-%dT%H:%M:%SZ")
                submitted = datetime.datetime.strptime(
                    assignment["SubmitTime"], "%Y-%m-%dT%H:%M:%SZ")
            else:
                pass
            total += submitted - accepted
            count += 1
        seconds_per_assignment = total.total_seconds() / count
        effective_hourly_wage = 60.0 * 60.0 / seconds_per_assignment * reward_per_assignment
        print("Effective completion time(%s) *reward(%s) = %s" %
              (seconds_per_assignment, reward_per_assignment,
               effective_hourly_wage))

    def allhits_liveness(self):
        #allassignments = self.conn.get_assignments(hit_id)
        #first = self.ah.get_submitted_transcriptions(hit_id,str(clipid))

        hits = self.conn.get_all_hits()
        for hit in hits:
            hit_id = hit.HITId
            print("HIT ID: %s" % hit_id)
            assignments = self.conn.get_assignments(hit_id)
            if len(assignments) == 0:
                if raw_input("Remove hit with no submitted assignments?(y/n)"
                             ) == "y":
                    try:
                        self.conn.disable_hit(hit_id)
                        clips = self.mh.get_artifact("transcription_hits",
                                                     {"_id": hit_id}, "clips")
                        self.mh.remove_transcription_hit(hit_id)
                        self.mh.update_audio_clips_state(clips, "Referenced")
                    except MTurkRequestError as e:
                        raise e
            else:
                if raw_input("Remove hit with %s submitted assignments?(y/n)" %
                             len(assignments)) == "y":
                    try:
                        self.conn.disable_hit(hit_id)
                    except MTurkRequestError as e:
                        raise e

    def run(self):
        audio_file_dir = "/home/taylor/data/corpora/LDC/LDC93S3A/rm_comp/rm1_audio1/rm1/ind_trn"
        #audio_file_dir = "/home/taylor/data/corpora/LDC/LDC93S3A/rm_comp/rm1_audio1/rm1/dep_trn"
        prompt_file_uri = "/home/taylor/data/corpora/LDC/LDC93S3A/rm_comp/rm1_audio1/rm1/doc/al_sents.snr"
        base_clip_dir = "/home/taylor/data/corpora/LDC/LDC93S3A/rm_comp/rm1_audio1/rm1/clips"
        selection = 0
        init_clip_count = 10000
        while selection != "11":
            selection = raw_input(
                """Audio Source file to Audio Clip Approved Pipeline:\n
                                     1: AudioSource-FileToClipped: Initialize Resource Management audio source files to %d queueable(Referenced) clips
                                     2: AudioClip-ReferencedToHit: Queue all referenced audio clips and create a HIT if the queue is full.
                                     3: AudioClip-HitToSubmitted: Check all submitted assignments for Transcriptions.
                                     4: AudioClip-SubmittedToApproved: Check all submitted clips against their reference.
                                     5: Review Current Hits
                                     6: Worker liveness
                                     7: Account balance
                                     8: Worker stats
                                     9: Recalculate worker WER                                     
                                     10: Assignment Stats
                                     11: Exit
                                    """ % init_clip_count)
            #selection = "5"
            if selection == "1":
                self._load_rm_audio_source_file_to_clipped(
                    audio_file_dir,
                    prompt_file_uri,
                    base_clip_dir,
                    init_clip_count=init_clip_count)
            elif selection == "2":
                self.audio_clip_referenced_to_hit()
            elif selection == "3":
                self.load_assignments_hit_to_submitted()
            elif selection == "4":
                self.assignment_submitted_approved()
            elif selection == "5":
                self.allhits_liveness()
            elif selection == "6":
                self.all_workers_liveness()
            elif selection == "7":
                print("Account balance: %s" % self.balance)
            elif selection == "8":
                self.stats()
            elif selection == "9":
                self.recalculate_worker_assignment_wer()
            elif selection == "10":
                self.get_assignment_stats()


#     def get_time_submitted_for_assignments(self):
#         assignments = self.mh.get_all_artifacts("assignments")
#         for assignment in assignments:
#             assignment_id = assignment["_id"]
#             a_assignment = self.conn.get_assignment(assignment_id)[0]
#             self.mh.update_artifact_by_id("assignments", assignment_id, "SubmitTime", a_assignment.SubmitTime)

#     def recalculate_worker_assignment_wer(self):
#         """For all submitted assignments,
#             if an answered question has a reference transcription,
#             check the WER.
#             If all the answered questions with reference transcriptions
#             have an acceptable WER, approve the assignment and update
#             the audio clips and transcriptions."""
#         assignments = self.mh.get_artifacts("assignments",{"state":"Approved"})
#         for assignment in assignments:
#             assignment_id = assignment["_id"]
#             denied = []
#             #If no transcriptions have references then we automatically approve the HIT
#             approved = True
#             transcription_ids = assignment["transcriptions"]
#             transcriptions = self.mh.get_transcriptions("_id",transcription_ids)
#             worker_id = assignment["worker_id"]
#             worker_id = self.mh.create_worker_artifact(worker_id)
#
#             max_rej_wer = (0.0,0.0)
#             total_wer = 0.0
#             for transcription in transcriptions:
#                 #Normalize the transcription
#                 #self.mh.normalize_transcription
#                 reference_id = self.mh.get_audio_clip_by_id(transcription["audio_clip_id"],"reference_transcription_id")
#                 if reference_id:
#                     reference_transcription = self.mh.get_reference_transcription({"_id": reference_id},
#                                                                                   "transcription")
#                     new_transcription = transcription["transcription"].split(" ")
#                     if reference_transcription:
#                         transcription_wer = cer_wer(reference_transcription,new_transcription)
#                         total_wer += transcription_wer
#                         if transcription_wer < WER_THRESHOLD:
#                             self.logger.info("WER for transcription(%s) %d"%(transcription["transcription"],transcription_wer))
#                         else:
#                             max_rej_wer = (transcription_wer,WER_THRESHOLD)
#                             denied.append((reference_transcription,new_transcription))
#                             approved = False
#             average_wer = total_wer/len(transcriptions)
#             #Update the worker
#             self.mh.add_assignment_to_worker(worker_id,(assignment_id,average_wer))