Exemplo n.º 1
0
def finalize_assignment(assignment_id, approve=True):
  connection = MTurkConnection(aws_access_key_id=MTURK_ACCESS_KEY, aws_secret_access_key=MTURK_SECRET_KEY,
   host=MTURK_HOST)

  if approve:
    connection.approve_assignment(assignment_id)
  else:
    connection.reject_assignment(assignment_id)
Exemplo n.º 2
0
def main():
    # parse arguments
    parser = argparse.ArgumentParser()
    parser.add_argument('keys_file')
    parser.add_argument('label_count_file')
    parser.add_argument('hit_id')
    parser.add_argument('--production', default=False, action='store_true')
    args = parser.parse_args()

    access_key_id, secret_key, host = mturk.get_keys_and_host(args.keys_file, args.production)

    connection = MTurkConnection(aws_access_key_id=access_key_id,
                                 aws_secret_access_key=secret_key,
                                 host=host)


    
    label_count_dict = {}
    execfile(args.label_count_file, label_count_dict)
    get_label_count = label_count_dict['get_label_count']


          
    for assignment in connection.get_assignments(args.hit_id):
        if assignment.AssignmentStatus != 'Submitted':
            continue

        label_count = get_label_count(assignment)

        if label_count == None:
            print 'label-count not found or not integer, skipping'
            continue

        if label_count < 0:
            print 'label-count below 0, skipping'
            continue

        if label_count > 100:
            print 'label-count too large, skipping'
            continue

        bonus_amount = label_count * 0.01

        print
        print 'approving label-count of %d with bonus of $%0.2f' % (label_count, bonus_amount)
        okay = raw_input('okay? ')

        if okay == '':
            connection.approve_assignment(assignment.AssignmentId, 'Thank you!')
            if label_count != 0:
                connection.grant_bonus(assignment.WorkerId, assignment.AssignmentId, Price(bonus_amount), 'For rating %d images.' % (label_count))

        else:
            print 'skipping'
Exemplo n.º 3
0
	def getAllHits(self, hits):
		mtc = MTurkConnection(aws_access_key_id=self.ACCESS_ID,
                      aws_secret_access_key=self.SECRET_KEY,
                      host=self.HOST)
		for hit in hits:
		    assignments = mtc.get_assignments(hit)
		    for assignment in assignments:
		        print "Answers of the worker %s" % assignment.WorkerId
		        for question_form_answer in assignment.answers[0]:
		            for key, value in question_form_answer.fields:
		                print "%s: %s" % (key,value)
		        mtc.approve_assignment(assignment.AssignmentId)
		        print "--------------------"
		    mtc.disable_hit(hit)
def get_final_score(HITId):

    mtc = MTurkConnection(aws_access_key_id=ACCESS_ID,
                      aws_secret_access_key=SECRET_KEY,
                      host=HOST)

    hits = mtc.get_all_hits()
    hits_dict = dict()

    for hit in hits:
        hits_dict[hit.HITId] = hit

    curr_hit = hits_dict[HITId]

    sum_opin = 0
    sum_acc = 0
    index = 0
    assignments = mtc.get_assignments(curr_hit.HITId)
    for assignment in assignments:
        #print "Answers of the worker %s" % assignment.WorkerId
        for question_form_answer in assignment.answers[0]:
            for key in question_form_answer.fields:
                if question_form_answer.qid == 'design':
                    #print "%s" % (key)
                    index=index+1
                    sum_opin+=int(key)
                else:
                    sum_acc += answer_key(key)
        mtc.approve_assignment(assignment.AssignmentId)
        #print "--------------------"
    mtc.disable_hit(curr_hit.HITId)

    #print "Average Score %s" % (sum_opin/index)
    #print "Legible Accuracy: %s%%" % (sum_acc/index)

    avg_ratings = float(sum_opin) / float(index)
    avg_ratings_score = avg_ratings * 25
    avg_legib_score = float(sum_acc) / float(index)

    # Calculate weighted average,
    # 60% for compare match score,
    # 40^% for ratings
    weighted_avg = 0.70*avg_legib_score + 0.30*avg_ratings
    return weighted_avg
def payTurkersAssignments():
    _mtc = MTurkConnection( host = _host )

    rejected = 0
    approved = 0
    failed_rejected = 0
    failed_approved = 0
    
    failed_approved_list = []
    failed_rejected_list = []

    return_dict = processAssignments( save=False )

    # list with the assignments that are not rejected nor flagged
    _good_assignments = return_dict['_good_assignments']
    for ass in _good_assignments:
        try:
            _mtc.approve_assignment( ass['assignment_id'] )
            approved += 1
        except MTurkRequestError:
            failed_approved += 1
            failed_approved_list.append( ass )

    # list containing the assignments that were flagged by the turkers
    _flagged_assignments = return_dict['_flagged_assignments']            
    for ass in _flagged_assignments:
        try:
            _mtc.approve_assignment( ass['assignment_id'] )
            approved += 1
        except MTurkRequestError:
            failed_approved += 1
            failed_approved_list.append( ass )

    # list with the assignments were something inexpected on my side happened
    _error_assignments = return_dict['_error_assignments']
    for ass in _error_assignments:
        try:
            _mtc.approve_assignment( ass['assignment_id'] )
            approved += 1
        except MTurkRequestError:
            failed_approved += 1
            failed_approved_list.append( ass )
                
    # list with the assignments that were rejected
    _rejected_assignments = return_dict['_rejected_assignments']
    for ass in _rejected_assignments:
        try:
            _mtc.reject_assignment( ass['assignment_id'] )
            rejected += 1
        except MTurkRequestError:
            failed_rejected += 1
            failed_rejected_list.append( ass )
            
    print "Approved:        [%d]"%approved
    print "Rejected:        [%d]"%rejected
    print "Not Approved:    [%d]"%failed_approved
    print "Not Rejected:    [%d]"%failed_rejected 
    
    return (failed_approved_list, failed_rejected_list)
Exemplo n.º 6
0
class TurkerResults:


    # TODO: this should be moved to DB 
    def get_tasklet_from_hit(self, hitid):
        self.cur.execute("""
        select task_id 
        from tasklet_session_log 
        where assignment_id not like 'picked_%%' and 
        worker_id not like 'internal_%%' 
        and hit_id = %s; """,[hitid])
        return self.cur.fetchall()

    def get_tasklet_kw(self, tid):
        self.cur.execute("""
        select keywords
        from tasklets
        where tasklets.id = %s
        """, [tid])
        return self.cur.fetchone()[0]

    def __init__(self):
        self.config = HaCRSUtil.get_config('../config.ini')
        HOST = self.config.get('mturk','host')
        AWS_ACCESS_KEY_ID = self.config.get('mturk', 'access_key_id')
        AWS_SECRET_ACCESS_KEY = self.config.get('mturk', 'secret_access_key')
        self.MTconnection = MTurkConnection(aws_access_key_id=AWS_ACCESS_KEY_ID,
             aws_secret_access_key=AWS_SECRET_ACCESS_KEY,
             host=HOST)
        self.db = HaCRSDB()
        self.con, self.cur = HaCRSUtil.get_db(self.config)
        self.mt = HaCRSTurker()

    def assignment_payout(self, assignments, amount):
        paysum = 0
        for assignment in assignments:
            if assignment.AssignmentStatus == 'Approved':
                paysum += amount
            else:
                pdb.set_trace()
                pass
        return paysum

    def get_paid_bonus(self, bonuses, assignmentid, workerid):
        for bonus in bonuses:
            if bonus['aid'] == assignmentid and bonus['wid'] == workerid :
                return bonus['price']
        return 0

    def get_all_hits(self):
        all_hits = [hit for hit in self.MTconnection.get_all_hits()]
        totassignments = 0
        maxtotalspent = 0
        for hit in all_hits:
            assignments = self.MTconnection.get_assignments(hit.HITId)
            assignmentpay = self.assignment_payout(assignments, float(hit.Amount))
            maxtotalspent += assignmentpay
            #print "{} - {} - {} - Expired: {} Keywords: {} #Assignments: {}".format(hit.CreationTime, hit.HITStatus, hit.HITReviewStatus, hit.expired, hit.Keywords, len(assignments))
            totassignments += len(assignments)
        #print ''
        #print "Total: #HIT: {}, #Assignments: {} TotalMaxSpent: {}".format(len(all_hits), totassignments, maxtotalspent)

    def log_worker(self, worker_base, worker_bonus, worker_solves, difficulty_solves, assignment, bonuses, hit):
        wid = assignment.WorkerId
        if wid not in worker_base.keys():
            worker_base[wid] = 0.0
            worker_solves[wid] = {}
        wbonus = self.get_paid_bonus(bonuses, assignment.AssignmentId, wid)
        worker_base[wid] += float(hit.Amount)

        if wbonus > 0:
            if wid not in worker_bonus.keys():
                worker_bonus[wid] = 0.0
            worker_bonus[wid] += wbonus
        if hit.Keywords not in worker_solves[wid].keys():
            worker_solves[wid][hit.Keywords] = 0
        worker_solves[wid][hit.Keywords] += 1

        if hit.Keywords not in difficulty_solves.keys():
            difficulty_solves[hit.Keywords] = 0
        difficulty_solves[hit.Keywords] += 1

    def get_all_spendings_by_worker(self):
        all_hits = [hit for hit in self.MTconnection.get_all_hits()]
        maxtotalspent = 0
        bonuses = json.load(open('bonus_paid.json'))
        worker_base = {}
        worker_bonus = {}
        worker_solves = {}
        difficulty_solves = {}
        for hit in all_hits:
            assignments = self.MTconnection.get_assignments(hit.HITId)
            for assignment in assignments:
                if assignment.AssignmentStatus == 'Approved':
                    self.log_worker(worker_base, worker_bonus, worker_solves, difficulty_solves, assignment, bonuses, hit)
        #print "Worker Base:"
        #print json.dumps(worker_base, sort_keys=True, indent=4, separators=(',', ': ') )
        #print "Worker Bonus:"
        #print json.dumps(worker_bonus, sort_keys=True, indent=4, separators=(',', ': ') )
        #print "Worker Solves:"
        #print json.dumps(worker_solves, sort_keys=True, indent=4, separators=(',', ': ') )
        #print "By Difficulty:"
        #print json.dumps(difficulty_solves, sort_keys=True, indent=4, separators=(',', ': ') )
        return



    def test_seek_tasklet(self, tid, program ):
        fseek = '{}/{}/{}-seek.json'.format(self.config.get('general', 'resultsfolder'), program, tid)
        if os.path.exists(fseek):
            return json.load(open(fseek))['triggered']
        else:
            return None

    def approve_reject(self, taskid_earnings):
        global EXPERIMENT_START
        all_hits = [hit for hit in self.MTconnection.get_all_hits()]
        worker_solvecount = {}
        worker_solvedifficulty = {}
        solved = 0

        tasklet_hit_done = set()
        empty = { 'easy': 0, 'medium': 0, 'hard': 0, 'very_hard': 0, 'priority': 0 }

        for hit in all_hits:
            if not hit.CreationTime.startswith(EXPERIMENT_START):
                #print 'old hit!'
                continue
            if hit.NumberOfAssignmentsCompleted == 0:
                continue
            tasklet_ids = self.get_tasklet_from_hit(hit.HITId)
            if len(tasklet_ids) == 0:
                continue

            for line in tasklet_ids:
                tid = str(line[0])
                tasklet = self.db.get_full_tasklet(tid)
                assignments = self.MTconnection.get_assignments(hit.HITId)
                for assignment in assignments:

                    if assignment.WorkerId not in worker_solvecount.keys():
                        worker_solvecount[assignment.WorkerId] = 0
                        worker_solvedifficulty[assignment.WorkerId] = copy.deepcopy(empty)

                    if tasklet['type'] != 'SEED':
                        assert False, 'Wrong tasklet type!'

                    #print 'hit: {} {}'.format(hit.HITStatus, hit.HITReviewStatus)
                    if assignment.AssignmentStatus == 'Approved':
                        tkey = "{}/{}/{}".format(hit.HITId, assignment.AssignmentId, assignment.WorkerId)
                        if not tkey in tasklet_hit_done:
                            worker_solvedifficulty[assignment.WorkerId][tasklet['keywords']] += 1
                            tasklet_hit_done.add(tkey)
                            #print 'Approved: {}'.format(tkey)

                    if hit.HITReviewStatus == 'NotReviewed':
                        if not assignment.AssignmentStatus == 'Submitted':
                            solved += 1
                            continue

                        try:
                            money = taskid_earnings[tid][assignment.WorkerId]
                        except Exception as e:
                            #print '{} error'.format(tasklet['type'])
                            continue

                        if money['payout'] < money['amount']:
                            pass
                            #print 'Possible reject: {}'.format(money['payout'])
                            # TODO - uncomment this to actually reject a task
                            #self.MTconnection.reject_assignment(assignment.AssignmentId)
                        if money['payout'] >= money['amount']:
                            self.MTconnection.approve_assignment(assignment.AssignmentId, feedback = "Thanks for participating, more similar tasks coming soon")
                            pass
                    else:
                        #print 'else: {}'.format(hit.HITReviewStatus)
                        pass


                    worker_solvecount[assignment.WorkerId]+= float(hit.Amount)
        #pprint (sorted(worker_solvecount.items(), key=operator.itemgetter(1)))
        #print "worker_solvecount"
        #print json.dumps(worker_solvecount, sort_keys=True, indent=4, separators=(',', ': ') )
        #print "worker_solvedifficulty"
        #print json.dumps(worker_solvedifficulty, sort_keys=True, indent=4, separators=(',', ': ') )
        #print "Solved: {}, total payout: {}".format(solved, sum(worker_solvecount.values()))

    def split_composite_key(self, k):
        # "{}-{}-{}-{}".format(taskid, hitid, assignmentid, workerid)
        tid = k[:36]
        hitid, aid, workerid = k[37:].split('-')
        assert len(hitid) == 30
        assert len(aid) == 30
        assert len(workerid) in [11, 12, 13, 14]
        return {'tid': tid, 'hitid': hitid, 'aid': aid, 'workerid': workerid}

    def get_seed_stats(self, seed_taskletid_solved):
        global EXPERIMENT_START

        unique_seed_workers = set()
        worker_payouts_base = {}
        worker_payouts_bonus = {}
        worker_payouts_combined = {}
        worker_solves = {}
        tasklet_solved = set()

        total_payout_base = 0
        total_payout_bonus = 0

        tasklet_difficulty = {}

        program_solves = {}

        for program in json.load(open(self.config.get('general', 'programsjson'))):
            prog_maxcoverage[program] = 0
            if program not in program_solves:
                program_solves[program] = 0
        program = None

        total_payout = 0

        for tasklet in self.db.get_seed_tasklets():

            if not str(tasklet['timestamp']).startswith(EXPERIMENT_START):
                continue

            if tasklet['program'] in ['seed_training', 'A_Game_of_Chance']:
                continue

            if tasklet['id'] in seed_taskletid_solved.keys():
                program_solves[tasklet['program']] += 1

            for jfile in glob.glob('{}/{}/{}*/*.json'.format(self.config.get('general', 'resultsfolder'), tasklet['program'], str(tasklet['id']))):
                try:
                    metadata = self.split_composite_key(jfile.split(os.path.sep)[jfile.split(os.path.sep).index('result.json')-1])

                # fake keys
                except Exception as e:
                    continue
                # that's us
                if metadata['workerid'] == 'A2PRAI0ABXN99X':
                    continue

                results = json.loads(open(jfile).readlines()[-1])
                tasklet = self.db.get_full_tasklet(metadata['tid'])

                payout = HaCRSUtil.get_current_payout(tasklet['payout_arr'], results['new_transitions'])
                prog_maxcoverage[tasklet['program']] = max(prog_maxcoverage[tasklet['program']], results['coverage'])
                if payout > 0:
                    if metadata['workerid'] not in worker_solves.keys():
                        worker_solves[metadata['workerid']] = 0
                    worker_solves[metadata['workerid']] += 1
                    unique_seed_workers.add(metadata['workerid'])
                    if tasklet['keywords'] not in tasklet_difficulty:
                        tasklet_difficulty[tasklet['keywords']] = 0
                    tasklet_difficulty[tasklet['keywords']] += 1
                    tasklet_solved.add(tasklet['id'])
                    if metadata['workerid'] not in worker_payouts_base.keys():
                        worker_payouts_base[metadata['workerid']] = []
                        worker_payouts_bonus[metadata['workerid']] = []
                        worker_payouts_combined[metadata['workerid']] = []
                    worker_payouts_base[metadata['workerid']].append(tasklet['amount'])
                    total_payout_base += tasklet['amount']
                    if payout > tasklet['amount']:
                        total_payout_bonus += round(payout - tasklet['amount'], 2)
                        worker_payouts_bonus[metadata['workerid']].append(round(payout - tasklet['amount'], 2))
                        total_payout_bonus += round(payout - tasklet['amount'], 2)
                    worker_payouts_combined[metadata['workerid']].append(round(payout, 2))
                total_payout += payout

                pass

        #print 'Total seed BASE payment: $ {}'.format(total_payout_base)
        #print 'Total seed BONUS payment: $ {}'.format(total_payout_bonus)
        #print 'Workers solving at least one SEED Task: {}'.format(len(unique_seed_workers))
        #print 'Number of solved SEED tasks: {}'.format(len(tasklet_solved))
        #print 'Number of tasklets by difficulty: {}'.format(tasklet_difficulty)
        #print 'Busiest worker: {} solves'.format(max(worker_solves.values()))
        #print 'Average worker throughput: {} solves'.format(round(sum(worker_solves.values())  / float(len(worker_solves.values())), 2))
        pdb.set_trace()

    def get_solve_ratio(self):
        global EXPERIMENT_START
        prog_maxcoverage = {}
        taskid_earnings = {}
        total_payout_with_bonus = 0

        goalreached = 0
        goalnotreached = 0
        for program in json.load(open(self.config.get('general', 'programsjson'))):
            prog_maxcoverage[program] = 0
        program = None

        seed_taskletid_solved = {}
        empty = { 'easy': 0, 'medium': 0, 'hard': 0, 'very_hard': 0, 'priority': 0 }

        for tasklet in self.db.get_seed_tasklets() + self.db.get_seek_tasklets():
            if not str(tasklet['timestamp']).startswith(EXPERIMENT_START):
                continue

            for jfile in glob.glob('{}/{}/{}*/*.json'.format(self.config.get('general', 'resultsfolder'), tasklet['program'], str(tasklet['id']))):
                try:
                    metadata = self.split_composite_key(jfile.split(os.path.sep)[jfile.split(os.path.sep).index('result.json')-1])
                # fake keys
                except Exception as e:
                    #print e
                    continue
                # that's us
                if metadata['workerid'] == 'A2PRAI0ABXN99X':
                    continue
                results = json.loads(open(jfile).readlines()[-1])
                tasklet = self.db.get_full_tasklet(metadata['tid'])

                if tasklet == None:
                    #print "No tasklet for program {}".format(tasklet['program'])
                    continue

                if tasklet['type'] == 'SEED':
                    payout = HaCRSUtil.get_current_payout(tasklet['payout_arr'], results['new_transitions'])
                    prog_maxcoverage[tasklet['program']] = max(prog_maxcoverage[tasklet['program']], results['coverage'])

                elif tasklet['type'] == 'SEEK':
                    payout = tasklet['amount']
                elif tasklet['type'] == 'DRILL':
                    payout = tasklet['amount']

                total_payout_with_bonus += payout

                # over-achieved
                if tasklet['amount'] <= payout:
                    goalreached += 1
                elif tasklet['amount'] > payout:
                    goalnotreached += 1

                if tasklet['amount'] <= payout and tasklet['type'] == 'SEED':
                    hitinfos = self.db.get_hit_for_tasklet(tasklet['id'])
                    for hit in hitinfos:
                        assignment = self.mt.get_assignment_from_hit(hit)
                        if assignment and assignment.WorkerId in ['A10O5YR01H865K', 'A1HRHFU7KTS0KW', 'A1PUHCEBSOWETV']:
                            pass
                        seed_taskletid_solved[tasklet['id']] = True

                if metadata['tid'] not in taskid_earnings.keys():
                    taskid_earnings[metadata['tid']] = {}
                if metadata['workerid'] not in taskid_earnings[metadata['tid']].keys():
                    taskid_earnings[metadata['tid']][metadata['workerid']] = {}

                taskid_earnings[metadata['tid']][metadata['workerid']] = {'payout': payout, 'amount': tasklet['amount']}
        #print "Goal reached: {}, Goal not reached: {}". format(goalreached, goalnotreached)
        #print "taskid_earnings"
        pprint(taskid_earnings)

        return taskid_earnings, prog_maxcoverage, seed_taskletid_solved

    def log_bonus(self, tid, wid, aid, price):
        self.bonuses.append({'tid': tid, 'wid': wid, 'aid': aid, 'price': price})
        json.dump(self.bonuses, open('bonus_paid.json', 'w'), sort_keys=True, indent=4, separators=(',', ': ') )

    def do_pay_bonus(self, tid, wid, aid, price):
        assert len(tid) > 5, 'tasklet id mismatch'
        reason = "We issued a bonus for reaching a stretch goal of our task - Thanks!"
        assert price < 5
        self.log_bonus(tid, wid, aid, price)
        try:
            self.MTconnection.grant_bonus(wid, aid, Price(price), reason)
            return True
        except Exception as e:
            #print "Not issued for whatever reason: {}".format(e)
            return False

    def bonus_paid_before(self, tid, wid, aid):
        for bonus in self.bonuses:
            if bonus['tid'] == tid and bonus['aid'] == aid and bonus['wid'] == wid:
                return True
        return False

    def check_bonus(self, taskid_earnings):
        self.bonuses = json.load(open('bonus_paid.json'))
        total_bonus_issued = 0
        worker_bonus = {}

        for program in json.load(open(self.config.get('general', 'programsjson'))):
            for jfile in glob.glob('{}/{}/*/result.json'.format(self.config.get('general', 'resultsfolder'), program)):
                try:
                    xkey = jfile.split(os.path.sep)[jfile.split(os.path.sep).index('result.json')-1]
                    if xkey.endswith('-OLD') or xkey.endswith('-internal_zardus'):
                        continue
                    metadata = self.split_composite_key(xkey)
                except Exception as e:
                    #pdb.set_trace()
                    #print 'Skipping {}'.format(e)
                    continue
                if metadata['workerid'] == 'A2PRAI0ABXN99X':
                    continue
                results = json.loads(open(jfile).readlines()[-1])
                tasklet = self.db.get_full_tasklet(metadata['tid'])
                try:
                    money = taskid_earnings[str(tasklet['id'])][metadata['workerid']]
                except Exception as e:
                    continue

                if tasklet['type'] != 'SEED':
                    #print "We only pay a bonus for SEEDing"
                    continue

                if money['payout'] > money['amount']:
                    bonus = round(money['payout'] - money['amount'], 2)
                    if metadata['workerid'] not in worker_bonus:
                        worker_bonus[metadata['workerid']] = 0
                    worker_bonus[metadata['workerid']] += round( worker_bonus[metadata['workerid']] + bonus, 2)


                    if self.bonus_paid_before(str(tasklet['id']), metadata['workerid'], metadata['aid']):
                        #print 'paid before - skip'
                        pass
                    else:
                        pass
                        #print 'Bonus payout: {}'.format(bonus)
                        # TODO...
                        #if self.do_pay_bonus(str(tasklet['id']), metadata['workerid'], metadata['aid'], bonus):
                        #    total_bonus_issued += bonus
        #print 'worker_bonus'
        #print json.dumps(worker_bonus, sort_keys=True, indent=4, separators=(',', ': ') )
        #print "Issued {} in bonuses".format(total_bonus_issued)

    def show_medium_hard(self, taskid_earnings):
        for tasklet in taskid_earnings:
            #tasklet = self.db.get_full_tasklet(tasklet)
            kw = self.get_tasklet_kw(tasklet)
            kwkey = ['easy', 'medium', 'hard', 'very_hard', 'priority'].index(kw)
            if len(filter(lambda x: x['payout'] >= x['amount'], taskid_earnings[tasklet].values())) > 0:
                pass
            #print '{} [{}] {}: {}'.format(self.db.get_tasklet_program(tasklet), kwkey, kw, tasklet)
        sys.exit(1)

    def approve_single(self, hitid):
        xhit = self.MTconnection.get_hit(hitid)
        assignments = self.MTconnection.get_assignments(xhit[0].HITId)
        #print 'Verify data before proceeding'
        pdb.set_trace()
        rc = self.MTconnection.approve_rejected_assignment(assignments[0].AssignmentId, feedback = "Thanks for participating, more similar tasks coming soon")


    def trace_hit(self, hitid):
        xhit = self.MTconnection.get_hit(hitid)
        pdb.set_trace()
        pass
Exemplo n.º 7
0
                    print "Assign ID is %s " % assign.AssignmentId
                    for question_form_answer in assign.answers[0]:
                        print question_form_answer.fields
                        print "-------------------"

        elif MainInput == 3:
            print "============Approve Workers============"
            print "What would you like do?"
            print "(1) Approve worker by HITId"
            print "(2) Approve all submissions"
            choice = int(sys.stdin.readline())

            if choice == 1:
                print "Input Assignment ID of the HIT you want to approve"
                inputassign = sys.stdin.readline()
                mtc.approve_assignment(inputassign)
                for hit in hits:
                    assignment = mtc.get_assignments(hit.HITId)
                    for assign in assignment:
                        if inputassign == assign.AssignmentId:
                            mtc.disable_hit(hit.HITId)
            elif choice == 2:
                hits = show_results(mtc)
                # for temp in hits:
                # 	mtc.disable_hit(temp.HITId)
                for hit in hits:
                    assignment = mtc.get_assignments(hit.HITId)
                    for assign in assignment:
                        mtc.approve_assignment(assign.AssignmentId)
                    mtc.disable_hit(hit.HITId)
                    # print 'Top HIT sumbission has been approved.'
Exemplo n.º 8
0
	def approveHit(self, assignment_id):
		mtc = MTurkConnection(aws_access_key_id=self.ACCESS_ID,
                      aws_secret_access_key=self.SECRET_KEY,
                      host=self.HOST)
		mtc.approve_assignment(assignment_id, feedback=None)
Exemplo n.º 9
0
class MTurkProvider(object):
    description = 'This is a task authored by a requester on Daemo, a research crowdsourcing platform. ' \
                  'Mechanical Turk workers are welcome to do it'
    keywords = ['daemo']
    countries = ['US', 'CA']
    min_hits = 1000

    def __init__(self, host, aws_access_key_id, aws_secret_access_key):
        self.host = host
        self.connection = MTurkConnection(
            aws_access_key_id=aws_access_key_id,
            aws_secret_access_key=aws_secret_access_key,
            host=settings.MTURK_HOST)
        self.connection.APIVersion = "2014-08-15"
        if not self.host:
            raise ValueError("Please provide a host url")

    def get_connection(self):
        return self.connection

    @staticmethod
    def _mturk_system_qualifications(qualification):
        requirements = []
        for item in qualification.items.all():
            if item.expression['attribute'] not in [
                    'location', 'approval_rate', 'total_tasks'
            ]:
                continue
            requirement = None
            if item.expression['attribute'] == 'location':
                op = OP_IN if item.expression['operator'] == 'in' else OP_NOT_IN
                requirement = MultiLocaleRequirement(op, [
                    val.strip() for val in item.expression['value']
                    if val is not None and val != ''
                ])
            elif item.expression['attribute'] == 'approval_rate':
                op = OP_GT if item.expression['operator'] == 'gt' else OP_LT
                requirement = PercentAssignmentsApprovedRequirement(
                    op, item.expression['value'])
            elif item.expression['attribute'] == 'total_tasks':
                op = OP_GT if item.expression['operator'] == 'gt' else OP_LT
                requirement = NumberHitsApprovedRequirement(
                    op, item.expression['value'])

            requirements.append(requirement)
        return requirements

    def get_qualifications(self, project, boomerang_threshold, add_boomerang):
        requirements = []
        if project.qualification is not None:
            requirements += self._mturk_system_qualifications(
                project.qualification)
        boomerang_qual, success = self.create_qualification_type(
            owner_id=project.owner_id,
            project_id=project.group_id,
            name='Boomerang Score #{}'.format(project.group_id),
            flag=FLAG_Q_BOOMERANG,
            description='No description available')
        boomerang = None
        if boomerang_threshold <= int(settings.BOOMERANG_MIDPOINT * 100):
            for i, bucket in enumerate(WAIT_LIST_BUCKETS):
                if int(bucket[1] * 100) <= boomerang_threshold:

                    boomerang_blacklist, success = \
                        self.create_qualification_type(owner_id=project.owner_id,
                                                       name='Boomerang Waitlist #{}-{}'.format(project.group_id, len(
                                                           WAIT_LIST_BUCKETS) - i),
                                                       flag=FLAG_Q_BOOMERANG,
                                                       description='No description available',
                                                       deny=True,
                                                       project_id=project.group_id,
                                                       bucket=bucket)
                    if success and add_boomerang:
                        boomerang = BoomerangRequirement(
                            qualification_type_id=boomerang_blacklist.type_id,
                            comparator=OP_DNE,
                            integer_value=None)
                        requirements.append(boomerang)

        else:
            boomerang = BoomerangRequirement(
                qualification_type_id=boomerang_qual.type_id,
                comparator=OP_GTEQ,
                integer_value=boomerang_threshold)
            if success and add_boomerang:
                requirements.append(boomerang)
        return Qualifications(requirements), boomerang_qual

    def create_hits(self, project, tasks=None, repetition=None):
        # if project.min_rating > 0:
        #     return 'NOOP'
        if not tasks:
            cursor = connection.cursor()
            # noinspection SqlResolve
            query = '''
                SELECT
                  max(id)                   id,
                  repetition,
                  group_id,
                  repetition - sum(existing_assignments) remaining_assignments,
                  min_rating
                FROM (
                       SELECT
                         t_rev.id,
                         t.group_id,
                         t.min_rating,
                         p.repetition,
                         CASE WHEN ma.id IS NULL OR ma.status IN (%(skipped)s, %(rejected)s, %(expired)s)
                           THEN 0
                         ELSE 1 END existing_assignments
                       FROM crowdsourcing_task t
                         INNER JOIN crowdsourcing_project p ON t.project_id = p.id
                         INNER JOIN crowdsourcing_task t_rev ON t_rev.group_id = t.group_id
                         LEFT OUTER JOIN mturk_mturkhit mh ON mh.task_id = t_rev.id
                         LEFT OUTER JOIN mturk_mturkassignment ma ON ma.hit_id = mh.id
                       WHERE t.project_id = (%(project_id)s) AND t_rev.exclude_at IS NULL
                       AND t_rev.deleted_at IS NULL
                ) t
                GROUP BY group_id, repetition, min_rating HAVING sum(existing_assignments) < repetition;
            '''
            cursor.execute(
                query, {
                    'skipped': TaskWorker.STATUS_SKIPPED,
                    'rejected': TaskWorker.STATUS_REJECTED,
                    'expired': TaskWorker.STATUS_EXPIRED,
                    'project_id': project.id
                })
            tasks = cursor.fetchall()

        rated_workers = Rating.objects.filter(
            origin_type=Rating.RATING_REQUESTER).count()
        add_boomerang = rated_workers > 0

        duration = project.timeout if project.timeout is not None else datetime.timedelta(
            hours=24)
        lifetime = project.deadline - timezone.now(
        ) if project.deadline is not None else datetime.timedelta(days=7)

        for task in tasks:
            question = self.create_external_question(task[0])
            mturk_hit = MTurkHIT.objects.filter(task_id=task[0]).first()
            qualifications, boomerang_qual = self.get_qualifications(
                project=project,
                boomerang_threshold=int(round(task[4], 2) * 100),
                add_boomerang=add_boomerang)
            qualifications_mask = 0
            if qualifications is not None:
                qualifications_mask = FLAG_Q_LOCALE + FLAG_Q_HITS + FLAG_Q_RATE + FLAG_Q_BOOMERANG
            hit_type, success = self.create_hit_type(
                title=project.name,
                description=self.description,
                price=project.price,
                duration=duration,
                keywords=self.keywords,
                approval_delay=datetime.timedelta(days=2),
                qual_req=qualifications,
                qualifications_mask=qualifications_mask,
                boomerang_threshold=int(round(task[4], 2) * 100),
                owner_id=project.owner_id,
                boomerang_qual=boomerang_qual)
            if not success:
                return 'FAILURE'

            if mturk_hit is None:
                try:
                    hit = self.connection.create_hit(
                        hit_type=hit_type.string_id,
                        max_assignments=task[3],
                        lifetime=lifetime,
                        question=question)[0]
                    self.set_notification(hit_type_id=hit.HITTypeId)
                    mturk_hit = MTurkHIT(hit_id=hit.HITId,
                                         hit_type=hit_type,
                                         task_id=task[0])
                except MTurkRequestError as e:
                    error = e.errors[0][0]
                    if error == 'AWS.MechanicalTurk.InsufficientFunds':
                        message = {
                            "type": "ERROR",
                            "detail":
                            "Insufficient funds on your Mechanical Turk account!",
                            "code": error
                        }

                        redis_publisher = RedisPublisher(facility='bot',
                                                         users=[project.owner])
                        message = RedisMessage(json.dumps(message))
                        redis_publisher.publish_message(message)
                    return 'FAILED'
            else:
                if mturk_hit.hit_type_id != hit_type.id:
                    result, success = self.change_hit_type_of_hit(
                        hit_id=mturk_hit.hit_id,
                        hit_type_id=hit_type.string_id)
                    if success:
                        mturk_hit.hit_type = hit_type
            mturk_hit.save()
        return 'SUCCESS'

    def create_hit_type(self,
                        owner_id,
                        title,
                        description,
                        price,
                        duration,
                        boomerang_threshold,
                        keywords=None,
                        approval_delay=None,
                        qual_req=None,
                        qualifications_mask=0,
                        boomerang_qual=None):
        hit_type = MTurkHITType.objects.filter(
            owner_id=owner_id,
            name=title,
            description=description,
            price=Decimal(str(price)),
            duration=duration,
            qualifications_mask=qualifications_mask,
            boomerang_threshold=boomerang_threshold).first()
        if hit_type is not None:
            return hit_type, True

        reward = Price(price)
        try:
            mturk_ht = self.connection.register_hit_type(
                title=title,
                description=description,
                reward=reward,
                duration=duration,
                keywords=keywords,
                approval_delay=approval_delay,
                qual_req=qual_req)[0]
            hit_type = MTurkHITType(owner_id=owner_id,
                                    name=title,
                                    description=description,
                                    price=Decimal(str(price)),
                                    keywords=keywords,
                                    duration=duration,
                                    qualifications_mask=qualifications_mask,
                                    boomerang_qualification=boomerang_qual,
                                    boomerang_threshold=boomerang_threshold)
            hit_type.string_id = mturk_ht.HITTypeId
            hit_type.save()
        except MTurkRequestError:
            return None, False
        return hit_type, True

    def create_external_question(self, task, frame_height=800):
        task_hash = Hashids(salt=settings.SECRET_KEY,
                            min_length=settings.ID_HASH_MIN_LENGTH)
        task_id = task_hash.encode(task)
        url = self.host + '/mturk/task/?taskId=' + task_id
        question = ExternalQuestion(external_url=url,
                                    frame_height=frame_height)
        return question

    def update_max_assignments(self, task):
        task = Task.objects.get(id=task['id'])
        mturk_hit = task.mturk_hit
        if not mturk_hit:
            raise MTurkHIT.DoesNotExist(
                "This task is not associated to any mturk hit")
        assignments_completed = task.task_workers.filter(~Q(status__in=[
            TaskWorker.STATUS_REJECTED, TaskWorker.STATUS_SKIPPED,
            TaskWorker.STATUS_EXPIRED
        ])).count()
        remaining_assignments = task.project.repetition - assignments_completed
        if remaining_assignments > 0 and mturk_hit.num_assignments == mturk_hit.mturk_assignments. \
            filter(status=TaskWorker.STATUS_SUBMITTED).count() and \
                mturk_hit.mturk_assignments.filter(status=TaskWorker.STATUS_IN_PROGRESS).count() == 0:
            self.add_assignments(hit_id=mturk_hit.hit_id, increment=1)
            self.extend_hit(hit_id=mturk_hit.hit_id)
            mturk_hit.status = MTurkHIT.STATUS_IN_PROGRESS
            mturk_hit.num_assignments += 1
            mturk_hit.save()
        elif remaining_assignments == 0:
            self.expire_hit(hit_id=mturk_hit.hit_id)
            mturk_hit.status = MTurkHIT.STATUS_EXPIRED
            mturk_hit.save()
        elif remaining_assignments > 0 and \
                mturk_hit.status == MTurkHIT.STATUS_EXPIRED:
            self.extend_hit(hit_id=mturk_hit.hit_id)
            mturk_hit.status = MTurkHIT.STATUS_IN_PROGRESS
        return 'SUCCESS'

    def get_assignment(self, assignment_id):
        try:
            return self.connection.get_assignment(assignment_id)[0], True
        except MTurkRequestError as e:
            error = e.errors[0][0]
            if error == 'AWS.MechanicalTurk.InvalidAssignmentState':
                return assignment_id, False
            return None, False

    def set_notification(self, hit_type_id):
        self.connection.set_rest_notification(
            hit_type=hit_type_id,
            url=self.host + '/api/mturk/notification',
            event_types=[
                'AssignmentReturned', 'AssignmentAbandoned',
                'AssignmentAccepted', 'AssignmentSubmitted'
            ])

    def approve_assignment(self, task_worker):
        task_worker_obj = TaskWorker.objects.get(id=task_worker['id'])
        if hasattr(task_worker_obj, 'mturk_assignments'
                   ) and task_worker_obj.mturk_assignments.first() is not None:
            try:
                self.connection.approve_assignment(
                    task_worker_obj.mturk_assignments.first().assignment_id)
            except MTurkRequestError:
                return False
        return True

    def reject_assignment(self, task_worker):
        task_worker_obj = TaskWorker.objects.get(id=task_worker['id'])
        if hasattr(task_worker_obj, 'mturk_assignments'
                   ) and task_worker_obj.mturk_assignments.first() is not None:
            try:
                self.connection.reject_assignment(
                    task_worker_obj.mturk_assignments.first().assignment_id)
            except MTurkRequestError:
                return False
        return True

    def expire_hit(self, hit_id):
        try:
            self.connection.expire_hit(hit_id)
        except MTurkRequestError:
            return False
        return True

    def disable_hit(self, hit_id):
        try:
            self.connection.disable_hit(hit_id)
        except MTurkRequestError:
            return False
        return True

    def extend_hit(self, hit_id):
        try:
            self.connection.extend_hit(hit_id=hit_id,
                                       expiration_increment=604800)  # 7 days
        except MTurkRequestError:
            return False
        return True

    def add_assignments(self, hit_id, increment=1):
        try:
            self.connection.extend_hit(hit_id=hit_id,
                                       assignments_increment=increment)
        except MTurkRequestError:
            return False
        return True

    def test_connection(self):
        try:
            return self.connection.get_account_balance()[0], True
        except MTurkRequestError as e:
            error = e.errors[0][0]
            if error == 'AWS.NotAuthorized':
                return None, False
            return None, False

    def get_account_balance(self):
        try:
            return self.connection.get_account_balance()[0]
        except MTurkRequestError:
            return None

    def create_qualification_type(self,
                                  owner_id,
                                  name,
                                  flag,
                                  description,
                                  project_id,
                                  auto_granted=False,
                                  auto_granted_value=None,
                                  deny=False,
                                  bucket=None):
        # noinspection SqlResolve
        query = '''
            SELECT * FROM (
                SELECT
                  task.target_id,
                  task.username,
                  round(task.task_w_avg::NUMERIC, 2) rating
                  --round(coalesce(task.task_w_avg, requester.requester_w_avg,
                  --  platform.platform_w_avg)::NUMERIC, 2) rating
                FROM (
                               SELECT
                                 target_id,
                                 origin_id,
                                 project_id,
                                 username,
                                 sum(weight * power((%(BOOMERANG_TASK_ALPHA)s), t.row_number))
                                 / sum(power((%(BOOMERANG_TASK_ALPHA)s), t.row_number)) task_w_avg
                               FROM (

                                      SELECT
                                        r.id,
                                        r.origin_id,
                                        p.group_id                              project_id,
                                        weight,
                                        r.target_id,
                                        -1 + row_number()
                                        OVER (PARTITION BY target_id
                                          ORDER BY tw.created_at DESC) AS row_number,
                                          u.username username

                                      FROM crowdsourcing_rating r
                                        INNER JOIN crowdsourcing_task t ON t.id = r.task_id
                                        INNER JOIN crowdsourcing_project p ON p.id = t.project_id
                                        INNER JOIN crowdsourcing_taskworker tw ON t.id = tw.task_id
                                          AND tw.worker_id=r.target_id
                                        INNER JOIN auth_user u ON u.id = r.target_id
                                      WHERE origin_id = (%(origin_id)s) AND origin_type = (%(origin_type)s)) t
                               GROUP BY origin_id, target_id, project_id, username)
                             task WHERE task.project_id = (%(project_id)s)
            ) r
        '''
        extra_query = 'WHERE rating BETWEEN (%(lower_bound)s) AND (%(upper_bound)s);'
        params = {
            'origin_type': Rating.RATING_REQUESTER,
            'origin_id': owner_id,
            'project_id': project_id,
            'BOOMERANG_REQUESTER_ALPHA': settings.BOOMERANG_REQUESTER_ALPHA,
            'BOOMERANG_PLATFORM_ALPHA': settings.BOOMERANG_PLATFORM_ALPHA,
            'BOOMERANG_TASK_ALPHA': settings.BOOMERANG_TASK_ALPHA
        }
        obj_params = {'upper_bound': 300, 'lower_bound': 100}
        if deny and bucket is not None:
            query += extra_query
            params.update({'upper_bound': bucket[1], 'lower_bound': bucket[0]})
            obj_params.update({
                'upper_bound': bucket[1] * 100,
                'lower_bound': bucket[0] * 100,
                'is_blacklist': True
            })
        cursor = connection.cursor()
        cursor.execute(query, params=params)
        worker_ratings_raw = cursor.fetchall()
        worker_ratings = [{
            "worker_id": r[0],
            "worker_username": r[1],
            "rating": r[2]
        } for r in worker_ratings_raw]

        qualification = MTurkQualification.objects.filter(owner_id=owner_id,
                                                          flag=flag,
                                                          name=name).first()
        assigned_workers = []
        if qualification is None:
            try:
                qualification_type = self.connection. \
                    create_qualification_type(name=name, description=description,
                                              status='Active',
                                              auto_granted=auto_granted,
                                              auto_granted_value=auto_granted_value)[0]
                qualification = MTurkQualification.objects.create(
                    owner_id=owner_id,
                    flag=flag,
                    name=name,
                    description=description,
                    auto_granted=auto_granted,
                    auto_granted_value=auto_granted_value,
                    type_id=qualification_type.QualificationTypeId,
                    **obj_params)
            except MTurkRequestError:
                return None, False
        else:
            assigned_workers = MTurkWorkerQualification.objects.values(
                'worker').filter(qualification=qualification).values_list(
                    'worker', flat=True)

        for rating in worker_ratings:
            user_name = rating["worker_username"].split('.')
            if len(user_name) == 2 and user_name[0] == 'mturk':
                mturk_worker_id = user_name[1].upper()
                if mturk_worker_id not in assigned_workers:
                    self.assign_qualification(
                        qualification_type_id=qualification.type_id,
                        worker_id=mturk_worker_id,
                        value=int(rating['rating'] * 100))
                defaults = {
                    'qualification': qualification,
                    'worker': mturk_worker_id,
                    'score': int(rating['rating'] * 100)
                }
                MTurkWorkerQualification.objects.update_or_create(
                    qualification=qualification,
                    worker=mturk_worker_id,
                    defaults=defaults)
        return qualification, True

    def change_hit_type_of_hit(self, hit_id, hit_type_id):
        try:
            result = self.connection.change_hit_type_of_hit(
                hit_id=hit_id, hit_type=hit_type_id)
        except MTurkRequestError:
            return None, False
        return result, True

    def update_worker_boomerang(self, project_id, worker_id, task_avg,
                                requester_avg):
        """
        Update boomerang for project
        Args:
            project_id:
            worker_id:
            task_avg:
            requester_avg

        Returns:
            str
        """
        hit = MTurkHIT.objects.select_related(
            'hit_type__boomerang_qualification').filter(
                task__project__group_id=project_id).first()
        if hit is not None:
            qualification = hit.hit_type.boomerang_qualification
            worker_qual = MTurkWorkerQualification.objects.filter(
                qualification=qualification, worker=worker_id).first()
            if worker_qual is not None:
                self.update_score(worker_qual,
                                  score=int(task_avg * 100),
                                  override=True)
            else:
                MTurkWorkerQualification.objects.create(
                    qualification=qualification,
                    worker=worker_id,
                    score=int(task_avg * 100),
                    overwritten=True)
                self.assign_qualification(
                    qualification_type_id=qualification.type_id,
                    worker_id=worker_id,
                    value=int(task_avg * 100))

                # other_quals = MTurkWorkerQualification.objects.filter(~Q(qualification=qualification),
                #                                                       worker=worker_id,
                #                                                       overwritten=False)
                # for q in other_quals:
                #     self.update_score(q, score=int(requester_avg * 100))
        return 'SUCCESS'

    def update_score(self, worker_qual, score, override=False):
        if worker_qual is None:
            return False
        try:
            self.connection.update_qualification_score(
                worker_qual.qualification.type_id, worker_qual.worker, score)
            worker_qual.overwritten = override
            worker_qual.score = score
            worker_qual.save()
        except MTurkRequestError:
            return False
        return True

    def assign_qualification(self, qualification_type_id, worker_id, value=1):
        """
        Revoke a qualification from a WorkerId
        Args:
            qualification_type_id:
            worker_id:
            value

        Returns:
            bool
        """
        try:
            self.connection.assign_qualification(qualification_type_id,
                                                 worker_id,
                                                 value,
                                                 send_notification=False)
            return True
        except MTurkRequestError:
            return False

    def revoke_qualification(self, qualification_type_id, worker_id):
        try:
            self.connection.revoke_qualification(
                qualification_type_id=qualification_type_id,
                subject_id=worker_id)
            return True
        except MTurkRequestError:
            return False

    def notify_workers(self, worker_ids, subject, message_text):
        try:
            self.connection.notify_workers(worker_ids, subject, message_text)
            return True
        except MTurkRequestError:
            return False
class ElicitationPipelineHandler(object):
    def __init__(self):
        aws_id = os.environ['AWS_ACCESS_KEY_ID']
        aws_k = os.environ['AWS_ACCESS_KEY']

        try:
            self.conn = MTurkConnection(aws_access_key_id=aws_id,\
                          aws_secret_access_key=aws_k,\
                          host=HOST)
        except Exception as e:
            print(e)

        self.ah = AssignmentHandler(self.conn)
        self.th = TurkerHandler(self.conn)
        self.hh = HitHandler(self.conn, TEMPLATE_DIR)
        self.mh = MongoElicitationHandler()
        self.ph = PromptHandler()
        self.filter = Filter(self.mh)
        self.balance = self.conn.get_account_balance()[0].amount
        self.batch_cost = 1
        if self.balance > self.batch_cost:
            self.balance = self.batch_cost
        else:
            raise IOError
        self.logger = logging.getLogger(
            "transcription_engine.elicitation_pipeline_handler")

    def load_PromptSource_RawToList(self, prompt_file_uri):
        """Create the prompt artifacts from the source."""
        prompt_dict = self.ph.get_prompts(prompt_file_uri)
        disk_space = os.stat(prompt_file_uri).st_size
        source_id = self.mh.create_prompt_source_artifact(
            prompt_file_uri, disk_space, len(prompt_dict))
        normalizer = Normalize()
        for key in prompt_dict:
            prompt, line_number = prompt_dict[key]
            normalized_prompt = normalizer.rm_prompt_normalization(prompt)
            self.mh.create_prompt_artifact(source_id, prompt,
                                           normalized_prompt, line_number, key,
                                           len(prompt))

    def load_assignment_hit_to_submitted(self):
        """Check all assignments for audio clip IDs.
            Update the audio clips.
            This is a non-destructive load of the assignments from MTurk"""
        hits = self.conn.get_all_hits()
        for hit in hits:
            transcription_dicts = [{}]
            hit_id = hit.HITId
            if self.mh.get_artifact("elicitation_hits", {"_id": hit_id}):
                assignments = self.conn.get_assignments(hit_id)
                have_all_assignments = True
                assignment_ids = []
                for assignment in assignments:
                    assignment_id = assignment.AssignmentId
                    assignment_ids.append(assignment_id)
                    if self.mh.get_artifact("elicitation_assignments",
                                            {"_id": assignment.AssignmentId}):
                        #We create assignments here, so if we already have it, skip
                        continue
                        #pass
                    else:
                        have_all_assignments = False
                    recording_ids = []
                    prompt_id_tag = "prompt_id"
                    recording_url_tag = "recording_url"
                    worker_id_tag = "worker_id"
                    recording_dict = self.ah.get_assignment_submitted_text_dict(
                        assignment, prompt_id_tag, recording_url_tag)
                    worker_oid = self.mh.create_worker_artifact(
                        assignment.WorkerId)
                    zipcode = None
                    for recording in recording_dict:
                        if recording[prompt_id_tag] == "zipcode":
                            zipcode = recording[recording_url_tag]
                            continue
                        if not self.mh.get_artifact_by_id(
                                "prompts", recording[prompt_id_tag]):
                            self.logger.info("Assignment(%s) with unknown %s(%s) skipped"%\
                                        (assignment_id,prompt_id_tag,recording[prompt_id_tag]))
                            break
                        recording_id = self.mh.create_recording_source_artifact(
                            recording[prompt_id_tag],
                            recording[recording_url_tag],
                            recording[worker_id_tag])
                        if not recording_id:
                            self.mh.create_assignment_artifact(assignment,
                                                               recording_ids,
                                                               zipcode=zipcode,
                                                               incomplete=True)
                            break

                        self.mh.add_item_to_artifact_set(
                            "prompts", recording[prompt_id_tag],
                            "recording_sources", recording_id)
                        recording_ids.append(recording_id)
                    else:
                        self.mh.create_assignment_artifact(assignment,
                                                           recording_ids,
                                                           zipcode=zipcode)
                        self.mh.add_item_to_artifact_set(
                            "elicitation_hits", hit_id,
                            "submitted_assignments", assignment_id)
                        self.mh.add_item_to_artifact_set(
                            "workers", worker_oid, "submitted_assignments",
                            assignment_id)
                print("Elicitation HIT(%s) submitted assignments: %s " %
                      (hit_id, assignment_ids))

    def approve_assignment_submitted_to_approved(self):
        """Approve all submitted assignments"""
        hits = self.conn.get_all_hits()
        for hit in hits:
            transcription_dicts = [{}]
            hit_id = hit.HITId
            if self.mh.get_artifact("elicitation_hits", {"_id": hit_id}):
                assignments = self.conn.get_assignments(hit_id)
                have_all_assignments = True
                assignment_ids = []
                for assignment in assignments:
                    assignment_id = assignment.AssignmentId
                    assignment_ids.append(assignment_id)
                    if self.mh.get_artifact("elicitation_assignments", {
                            "_id": assignment_id,
                            "state": "Submitted"
                    }):
                        #WARNING: this Approves every assignment
                        self.conn.approve_assignment(
                            assignment_id,
                            "Thank you for completing this assignment!")
                        self.mh.update_artifact_by_id(
                            "elicitation_assignments", assignment_id,
                            "approval_time", datetime.datetime.now())

    def approve_assignment_by_worker(self):
        """Approve all submitted assignments"""
        approval_comment = "Thank you for your recordings, good work, assignment approved!"
        denial_comment = "I'm sorry but your work was denied because %s"
        hits = self.conn.get_all_hits()
        for hit in hits:
            transcription_dicts = [{}]
            hit_id = hit.HITId
            if self.mh.get_artifact("elicitation_hits", {"_id": hit_id}):
                assignments = self.conn.get_assignments(hit_id)
                have_all_assignments = True
                assignment_ids = []
                for assignment in assignments:
                    assignment_id = assignment.AssignmentId
                    assignment_ids.append(assignment_id)
                    if self.mh.get_artifact("elicitation_assignments", {
                            "_id": assignment_id,
                            "state": "Submitted"
                    }):
                        #WARNING: this Approves every assignment
                        assignment_artifact = self.mh.get_artifact(
                            "elicitation_assignments", {"_id": assignment_id})
                        recording_ids = assignment_artifact["recordings"]
                        worker = self.mh.get_artifact(
                            "workers",
                            {"eid": assignment_artifact["worker_id"]})
                        if worker["state"] == "Approved":
                            #If the worker is approved, approve the assignment automatically
                            self.conn.approve_assignment(
                                assignment_id, approval_comment)
                            self.mh.update_artifact_by_id(
                                "elicitation_assignments", assignment_id,
                                "approval_time", datetime.datetime.now())
                            continue
                        elif worker["state"] == "Rejected":
                            self.conn.reject_assignment(
                                assignment_id, worker["rejection_reason"])
                            self.mh.update_artifact_by_id(
                                "elicitation_assignments", assignment_id,
                                "approval_time", datetime.datetime.now())
                            continue
                        recording_uris = []
                        for recording_id in recording_ids:
                            uri = self.mh.get_artifact_by_id(
                                "recording_sources", recording_id,
                                "recording_uri")
                            recording_uris.append(uri)
                        command = ["gnome-mplayer"] + recording_uris
                        if len(recording_uris) > 0 and recording_uris[
                                0].endswith(" .wav") or recording_uris[
                                    0].endswith(".com.wav"):
                            continue
                        print("Calling: %s" % command)
                        call(command)
                        approve_assignment = raw_input(
                            "Approve assignment(y/n/s)?")
                        if approve_assignment == "s":
                            #skip
                            continue
                        elif approve_assignment == "y":
                            #accept the assignment
                            self.conn.approve_assignment(
                                assignment_id, approval_comment)
                            self.mh.update_artifact_by_id(
                                "elicitation_assignments", assignment_id,
                                "approval_time", datetime.datetime.now())
                            approve_worker = raw_input("Approve worker(y/n)?")
                            if approve_worker == "y":
                                #approve the worker and all future assignments
                                self.mh.update_artifact_by_id(
                                    "workers", worker["_id"], "approval_time",
                                    datetime.datetime.now())
                        elif approve_assignment == "n":
                            #Reject the assignment
                            reject_worker = raw_input(
                                "Reject this worker's future work?")
                            if reject_worker == "y":
                                #Reject the worker
                                reason = raw_input(
                                    "Reason for rejecting this worker's future work:"
                                )
                                self.mh.update_artifact_by_id(
                                    "workers", worker["_id"],
                                    "rejection_reason", reason)
                                self.conn.reject_assignment(
                                    assignment_id,
                                    denial_comment % reason + ".")
                            else:
                                reason = raw_input(
                                    "Why reject the assignment?")
                                self.conn.reject_assignment(
                                    assignment_id,
                                    denial_comment % reason + ".")

    def get_assignment_stats(self):
        effective_hourly_wage = self.effective_hourly_wage_for_approved_assignments(
            .20)

    def effective_hourly_wage_for_approved_assignments(self,
                                                       reward_per_assignment):
        """Calculate the effective hourly wage for Approved Assignments"""
        approved_assignments = self.mh.get_artifacts_by_state(
            "elicitation_assignments", "Approved")
        total = datetime.timedelta(0)
        count = 0
        for assignment in approved_assignments:
            accepted = datetime.datetime.strptime(assignment["AcceptTime"],
                                                  "%Y-%m-%dT%H:%M:%SZ")
            submitted = datetime.datetime.strptime(assignment["SubmitTime"],
                                                   "%Y-%m-%dT%H:%M:%SZ")
            total += submitted - accepted
            count += 1
            #self.mh.update_artifact_by_id("elicitation_assignments", assignment["_id"], "SubmitTime", completion_time)
        seconds_per_assignment = total.total_seconds() / count
        effective_hourly_wage = 60.0 * 60.0 / seconds_per_assignment * reward_per_assignment
        print("Effective completion time(%s) *reward(%s) = %s" %
              (seconds_per_assignment, reward_per_assignment,
               effective_hourly_wage))

    def enqueue_prompts_and_generate_hits(self):
        prompts = self.mh.get_artifacts_by_state("prompts", "New")
        for prompt in prompts:
            self.mh.enqueue_prompt(prompt["_id"], 1, 5)
            prompt_queue = self.mh.get_prompt_queue()
            prompt_pairs = self.mh.get_prompt_pairs(prompt_queue)
            if prompt_pairs:
                hit_title = "Audio Elicitation"
                question_title = "Speak and Record your Voice"
                hit_description = "Speak the prompt and record your voice."
                keywords = "audio, elicitation, speech, recording"
                if cost_sensitive:
                    reward_per_clip = 0.04
                    max_assignments = 2
                    estimated_cost = self.hh.estimate_html_HIT_cost(prompt_pairs,reward_per_clip=reward_per_clip,\
                                                                    max_assignments=max_assignments)
                    prompts_in_hits = self.mh.prompts_already_in_hit(
                        prompt_pairs)
                    if prompts_in_hits:
                        #If one or more clips are already in a HIT, remove it from the queue
                        self.mh.remove_artifact_from_queue(prompts_in_hits)
                    elif self.balance - estimated_cost >= 0:
                        #if we have enough money, create the HIT
                        response = self.hh.make_html_elicitation_HIT(
                            prompt_pairs,
                            hit_title,
                            question_title,
                            keywords,
                            hit_description,
                            max_assignments=max_assignments,
                            reward_per_clip=reward_per_clip)
                        #                         response = self.hh.make_question_form_elicitation_HIT(prompt_pairs,hit_title,
                        #                                                      question_title, keywords)
                        self.balance = self.balance - estimated_cost
                        if type(response) == ResultSet and len(
                                response) == 1 and response[0].IsValid:
                            response = response[0]
                            self.mh.remove_artifacts_from_queue(
                                "prompt_queue", prompt_queue)
                            prompt_ids = [w["prompt_id"] for w in prompt_queue]
                            hit_id = response.HITId
                            hit_type_id = response.HITTypeId
                            self.mh.create_elicitation_hit_artifact(
                                hit_id, hit_type_id, prompt_ids)
                            self.mh.update_artifacts_by_id(
                                "prompts", prompt_ids, "hit_id", hit_id)
                            self.logger.info("Successfully created HIT: %s" %
                                             hit_id)
                    else:
                        return True
        print("Amount left in batch: %s out of %s" %
              (self.balance, self.batch_cost))

    def allhits_liveness(self):
        #allassignments = self.conn.get_assignments(hit_id)
        #first = self.ah.get_submitted_transcriptions(hit_id,str(clipid))

        hits = self.conn.get_all_hits()
        selection = raw_input("Remove all hits with no assignments?")
        if selection == "y":
            for hit in hits:
                hit_id = hit.HITId
                assignments = self.conn.get_assignments(hit_id)
                if len(assignments) == 0:
                    try:
                        self.conn.disable_hit(hit_id)
                        prompts = self.mh.get_artifact("elicitation_hits",
                                                       {"_id": hit_id},
                                                       "prompts")
                        self.mh.remove_elicitation_hit(hit_id)
                        if prompts:
                            self.mh.update_artifacts_state("prompts", prompts)
                        else:
                            pass
                    except MTurkRequestError as e:
                        raise e
            return True
        for hit in hits:
            hit_id = hit.HITId
            print("HIT ID: %s" % hit_id)
            assignments = self.conn.get_assignments(hit_id)
            if len(assignments) == 0:
                if raw_input("Remove hit with no submitted assignments?(y/n)"
                             ) == "y":
                    try:
                        self.conn.disable_hit(hit_id)
                        prompts = self.mh.get_artifact("elicitation_hits",
                                                       {"_id": hit_id},
                                                       "prompts")
                        self.mh.remove_elicitation_hit(hit_id)
                        if prompts:
                            self.mh.update_artifacts_state("prompts", prompts)
                        else:
                            pass
                    except MTurkRequestError as e:
                        raise e
            else:
                if raw_input("Remove hit with %s submitted assignments?(y/n)" %
                             len(assignments)) == "y":
                    try:
                        self.conn.disable_hit(hit_id)
                    except MTurkRequestError as e:
                        raise e

    def run(self):
        #audio_file_dir = "/home/taylor/data/corpora/LDC/LDC93S3A/rm_comp/rm1_audio1/rm1/dep_trn"
        prompt_file_uri = "/home/taylor/data/corpora/LDC/LDC93S3A/rm_comp/rm1_audio1/rm1/doc/al_sents.snr"
        selection = 0
        #self.get_time_submitted_for_assignments()
        while selection != "8":
            selection = raw_input(
                """Prompt Source raw to Elicitations-Approved Pipeline:\n
                                     1: PromptSource-Load_RawToList: Load Resource Management 1 prompt source files to queueable prompts
                                     2: Prompt-ReferencedToHit: Queue all referenced prompts and create a HIT if the queue is full.
                                     3: Prompt-HitToAssignmentSubmitted: Check all submitted assignments for Elicitations and download elicitations.
                                     4: Maintain all assignments and hits.
                                     5: (WARNING, approves all assignments) Approve all submitted assignments.
                                     6: Calculate assignment stats.
                                     7: Hand approve submitted assignments by elicitation and/or by worker. 
                                     8: Exit
                                    """)
            if selection == "1":
                self.load_PromptSource_RawToList(prompt_file_uri)
            elif selection == "2":
                self.enqueue_prompts_and_generate_hits()
            elif selection == "3":
                self.load_assignment_hit_to_submitted()
            elif selection == "4":
                self.allhits_liveness()
            elif selection == "5":
                self.approve_assignment_submitted_to_approved()
            elif selection == "6":
                self.get_assignment_stats()
            elif selection == "7":
                self.approve_assignment_by_worker()
            else:
                selection = "8"


#    prompt_dict = self.ph.get_prompts(prompt_file_uri)

#     def get_time_submitted_for_assignments(self):
#         assignments = self.mh.get_all_artifacts("elicitation_assignments")
#         for assignment in assignments:
#             assignment_id = assignment["_id"]
#             a_assignment = self.conn.get_assignment(assignment_id)[0]
#             self.mh.update_artifact_by_id("elicitation_assignments", assignment_id, "SubmitTime", a_assignment.SubmitTime)
Exemplo n.º 11
0
class HitCreator():
    def __init__(self):
        if settings.IS_DEV_ENV or settings.USE_AMT_SANDBOX:
            HOST = 'mechanicalturk.sandbox.amazonaws.com'
        else:
            HOST = 'mechanicalturk.amazonaws.com'

        self.connection = MTurkConnection(
                aws_access_key_id=settings.AWS_ACCESS_KEY_ID,
                aws_secret_access_key=settings.AWS_SECRET_ACCESS_KEY,
                host=HOST)

    def createHitFrom(self, audioSnippet, hitType, numIncorrectWords=None):
        if hitType == "fix":
            suffix = "fixHIT"
            # half cent per incorrect word, up to eight words
            assert isinstance(numIncorrectWords, int)
            amount = max(min(.05, numIncorrectWords*.005), .02)
        elif hitType == "check":
            suffix = "checkHIT"
            amount = 0.05
        else:
            assert False

        if settings.IS_DEV_ENV:
            baseurl = 'https://localhost:5000/hit/' + suffix
        else:
            baseurl = "https://transcroobie.herokuapp.com/hit/" + suffix
        title = "Transcribe a short audio clip."
        description = "Transcribe the audio. Words may be cut off at the beginning"\
                      " or end of the segment. Do not worry about correctly"\
                      " transcribing these words."
        keywords = ["transcription"]
        frame_height = 800

        thisDocUrl = baseurl + "?docId=" + str(audioSnippet.pk)
        questionform = ExternalQuestion(thisDocUrl, frame_height)

        resultSet = self.connection.create_hit(
            title=title,
            description=description,
            keywords=keywords,
            max_assignments=1,
            question=questionform,
            reward=Price(amount=amount),
            response_groups=('Minimal', 'HITDetail'),  # I don't know what response groups are
        )
        assert len(resultSet) == 1
        audioSnippet.activeHITId = resultSet[0].HITId
        audioSnippet.save()

    def deleteHit(self, hitID):
        try:
            self.connection.disable_hit(hitID)
        except MTurkRequestError as e:
            print "HIT already deleted", e

    def deleteAllHits(self):
        allHits = [hit for hit in self.connection.get_all_hits()]
        for hit in allHits:
            print "Disabling hit ", hit.HITId
            self.deleteHit(hit.HITId)

    def processHit(self, questionFormAnswers):
        # Process each HIT only once. This function will set activeHITId to ""
        # to let you know that the HIT is completed and processed.
        hitType = None
        response = None
        audioSnippet = None
        fixWords = {}
        for questionFormAnswer in questionFormAnswers:
            if questionFormAnswer.qid == "asFileId":
                asFileId = questionFormAnswer.fields[0]
                audioSnippet = get_object_or_404(AudioSnippet, pk = asFileId)
            elif questionFormAnswer.qid == "fixedHITResult":
                hitType = "fix"
                response = None # need to look at word_%d based on audiosnippet
            elif questionFormAnswer.qid.startswith("word_"):
                fixWords[questionFormAnswer.qid] = questionFormAnswer.fields[0]
            elif questionFormAnswer.qid == "checkedHITResult":
                hitType = "check"
                responseStr = questionFormAnswer.fields[0]
                response = [val == 'true' for val in responseStr.split(',')]

        numIncorrectWords = 0
        if hitType == "fix":
            # Get the list of words marked incorrect, and count them
            incorrectWords = audioSnippet.incorrectWords['bools'][-1]
            numIncorrectWords = len(incorrectWords)-sum(incorrectWords)

            # Get the last prediction to interpret incorrectWords
            prediction = audioSnippet.predictions[-1].split()

            # Convert the last prediction to what was actually sent to
            # the user
            predictionSpaced = transcriptWithSpacesAndEllipses(prediction)
            assert len(incorrectWords) == len(predictionSpaced)
            words, isCorrect = combineConsecutiveDuplicates(predictionSpaced,
                    incorrectWords)

            response = ""
            for i in xrange(len(words)):
                if not isCorrect[i]:
                    response += fixWords["word_" + str(i)] + " "
                else:
                    # Only add punctuation (" ") and ellipses if marked incorrect
                    word = words[i]
                    if word.isspace() or word == "":
                        continue
                    elif i == 0 and word.startswith("..."):
                        word = word[3:] # remove initial ellipses
                    elif i == len(words)-1 and word.endswith("..."):
                        word = word[:-3] # remove trailing ellipses
                    response += word.strip() + " "
            audioSnippet.predictions.append(response)

            # Always do a check after a fix
            completionStatus = CompletionStatus.incomplete
        else:
            audioSnippet.incorrectWords['bools'].append(response)
            completionStatus = self.getCompletionStatus(audioSnippet, response)
            if completionStatus == CompletionStatus.correct:
                audioSnippet.hasBeenValidated = True
                audioSnippet.isComplete = True
            elif completionStatus == CompletionStatus.givenup:
                audioSnippet.hasBeenValidated = False
                audioSnippet.isComplete = True
        audioSnippet.activeHITId = ""

        if completionStatus == CompletionStatus.incomplete:
            if hitType == "check":
                # CHECK task complete. Create a FIX task (since not # hasBeenValidated)
                self.createHitFrom(audioSnippet, 'fix', numIncorrectWords)
            elif hitType == "fix":
                # FIX task complete. Create a CHECK task.
                self.createHitFrom(audioSnippet, 'check')

        audioSnippet.save()

    def getCompletionStatus(self, audioSnippet, response):
        # only callwhen all hitTypes == "check"
        # returns a CompletionStatus
        MAX_NUM_PREDICTIONS = 2

        completionStatus = CompletionStatus.incomplete
        if all(response):
            completionStatus = CompletionStatus.correct
        elif len(audioSnippet.predictions) > MAX_NUM_PREDICTIONS:
            completionStatus = CompletionStatus.givenup
        return completionStatus

    def processHits(self, doc):
        """ Returns whether or not the doc had a newly-completed HIT
            which was processed. """
        assert not doc.completeTranscript
        audioSnippets = doc.audioSnippets.order_by('id')

        newHITCompleted = False
        assignments = []
        for audioSnippet in audioSnippets:
            hitID = audioSnippet.activeHITId
            if not hitID: continue

            try:
                hit = self.connection.get_hit(hitID)
            except MTurkRequestError as e:
                logger.error("Perhaps this HIT no longer exists: " + str(e))
                continue

            asgnForHit = self.connection.get_assignments(hit[0].HITId)
            if asgnForHit:
                # Hit is ready. Get the data.
                for asgn in asgnForHit:
                    assignments.append(asgn)
                    questionFormAnswers = asgn.answers[0]
                    self.processHit(questionFormAnswers)
                    newHITCompleted = True

        statuses = [a.isComplete for a in audioSnippets]
        if all([a.hasBeenValidated for s in statuses]) or \
                all([a.isComplete for a in audioSnippets]):
            # Note: if the conditional is not met, predictions may be an empty
            # array. Don't run this next line outside of this conditional.
            # (Happens only in a race condition after the audioSnippet is
            # uploaded, and before it adds its first prediction.)
            responses = [a.predictions[-1] for a in audioSnippets]

            # All tasks complete for first time
            totalString = overlap.combineSeveral(responses)
            doc.completeTranscript = totalString
            doc.save()

        return newHITCompleted

    def isTaskReady(self, hitID):
        return len(self.connection.get_assignments(hitID)) > 0

    def approveAllHits(self):
        # Approve hits:
        for assignment in self.getAllAssignments():
            try:
                self.connection.approve_assignment(assignment.AssignmentId)
            except MTurkRequestError as e:
                # Maybe already approved?
                logger.error("MTurk Request Error: " + str(e))

    def checkIfHitsReady(self):
        return True

    def getAllAssignments(self):
        allHits = [hit for hit in self.connection.get_all_hits()]

        # Approve hits:
        for hit in allHits:
            assignments = self.connection.get_assignments(hit.HITId)
            for assignment in assignments:
                yield assignment
Exemplo n.º 12
0
class MTurk(object):
    def __init__(self, app=None):
        self.host = 'https://mechanicalturk.sandbox.amazonaws.com'
        self.secret_key = None
        self.access_id = None
        self.app = app

        if app is not None:
            self.init_app(app)

    def init_app(self, app):
        app.config.setdefault('MTURK_SECRET_KEY', None)
        app.config.setdefault('MTURK_ACCESS_ID', None)
        app.config.setdefault('MTURK_SANDBOX', True)
        self.update_credentials(app.config['MTURK_ACCESS_ID'],
                                app.config['MTURK_SECRET_KEY'])
        self.is_sandbox = app.config['MTURK_SANDBOX']
        self.valid_login = self.verify_aws_login()

    def update_credentials(self, aws_access_key_id, aws_secret_access_key):
        self.aws_access_key_id = aws_access_key_id
        self.aws_secret_access_key = aws_secret_access_key

    def verify_aws_login(self):
        if ((self.aws_secret_access_key is None)
                or (self.aws_access_key_id is None)):
            logging.warning('No AWS keys found in app configuration')
        else:
            host = 'mechanicalturk.amazonaws.com'
            params = dict(aws_access_key_id=self.aws_access_key_id,
                          aws_secret_access_key=self.aws_secret_access_key,
                          host=host)
            self.mtc = MTurkConnection(**params)
            try:
                self.mtc.get_account_balance()
            except MTurkRequestError as e:
                return dict(success=False, message=e.error_message)
            else:
                return True

    def connect_to_turk(self):
        if not self.valid_login:
            logging.warning(
                'Sorry, unable to connect to Amazon Mechanical Turk. Please check your credentials'
            )
            return False
        if self.is_sandbox:
            host = 'mechanicalturk.sandbox.amazonaws.com'
        else:
            host = 'mechanicalturk.amazonaws.com'

        mturkparams = dict(aws_access_key_id=self.aws_access_key_id,
                           aws_secret_access_key=self.aws_secret_access_key,
                           host=host)
        self.mtc = MTurkConnection(**mturkparams)
        return True

    def get_account_balance(self):
        if not self.connect_to_turk():
            return dict(success=False, message='Could not connect to AWS')
        try:
            balance = self.mtc.get_account_balance()
        except MTurkRequestError as e:
            return dict(success=False, message=e.error_message)
        else:
            return balance

    def get_reviewable_hits(self):
        if not self.connect_to_turk():
            return dict(success=False, message='Could not connect to AWS')
        try:
            hits = self.mtc.get_all_hits()
        except MTurkRequestError as e:
            return dict(success=False, message=e.error_message)

        reviewable_hits = [
            hit for hit in hits
            if hit.HITStatus == "Reviewable" or hit.HITStatus == "Reviewing"
        ]

        hits_data = [
            MTurkHIT({
                'hitid': hit.HITId,
                'title': hit.Title,
                'status': hit.HITStatus,
                'max_assignments': hit.MaxAssignments,
                'number_assignments_completed':
                hit.NumberOfAssignmentsCompleted,
                'number_assignments_pending': hit.NumberOfAssignmentsPending,
                'number_assignments_available':
                hit.NumberOfAssignmentsAvailable,
                'creation_time': hit.CreationTime,
                'expiration': hit.Expiration
            }) for hit in reviewable_hits
        ]

        return hits_data

    def get_all_hits(self):
        """ Get all HITs """
        if not self.connect_to_turk():
            return False
        try:
            hits = self.mtc.get_all_hits()
        except MTurkRequestError as e:
            return dict(success=False, message=e.error_message)
        hits_data = [
            MTurkHIT({
                'hitid': hit.HITId,
                'title': hit.Title,
                'status': hit.HITStatus,
                'max_assignments': hit.MaxAssignments,
                'number_assignments_completed':
                hit.NumberOfAssignmentsCompleted,
                'number_assignments_pending': hit.NumberOfAssignmentsPending,
                'number_assignments_available':
                hit.NumberOfAssignmentsAvailable,
                'creation_time': hit.CreationTime,
                'expiration': hit.Expiration,
            }) for hit in hits
        ]
        return hits_data

    def get_active_hits(self):
        """ Get active HITs """
        if not self.connect_to_turk():
            return dict(success=False, message='Could not connect to AWS')
        # hits = self.mtc.search_hits()
        try:
            hits = self.mtc.get_all_hits()
        except MTurkRequestError as e:
            return dict(success=False, message=e.error_message)
        active_hits = [hit for hit in hits if not hit.expired]
        hits_data = [
            MTurkHIT({
                'hitid': hit.HITId,
                'title': hit.Title,
                'status': hit.HITStatus,
                'max_assignments': hit.MaxAssignments,
                'number_assignments_completed':
                hit.NumberOfAssignmentsCompleted,
                'number_assignments_pending': hit.NumberOfAssignmentsPending,
                'number_assignments_available':
                hit.NumberOfAssignmentsAvailable,
                'creation_time': hit.CreationTime,
                'expiration': hit.Expiration,
            }) for hit in active_hits
        ]
        return hits_data

    def get_hit(self, hit_id, response_groups=None):
        if not self.connect_to_turk():
            return dict(success=False, message='Could not connect to AWS')
        try:
            hit = self.mtc.get_hit(hit_id, response_groups)[0]
        except MTurkRequestError as e:
            return False
        return hit

    def get_workers(self, assignment_status=None):
        """ Get workers """
        if not self.connect_to_turk():
            return dict(success=False, message='Could not connect to AWS')
        try:
            hits = self.mtc.search_hits(sort_direction='Descending',
                                        page_size=20)
        except MTurkRequestError as e:
            return False
        hit_ids = [hit.HITId for hit in hits]
        workers_nested = [
            self.mtc.get_assignments(hit_id,
                                     status=assignment_status,
                                     sort_by='SubmitTime',
                                     page_size=100) for hit_id in hit_ids
        ]

        workers = [val for subl in workers_nested
                   for val in subl]  # Flatten nested lists

        worker_data = [{
            'hitId': worker.HITId,
            'assignmentId': worker.AssignmentId,
            'workerId': worker.WorkerId,
            'submit_time': worker.SubmitTime,
            'accept_time': worker.AcceptTime,
            'status': worker.AssignmentStatus,
            'completion_code': worker.answers[0][0].fields[0]
        } for worker in workers]
        return worker_data

    def bonus_worker(self, assignment_id, amount, reason=""):
        """ Bonus worker """
        if not self.connect_to_turk():
            return dict(success=False, message='Could not connect to AWS')
        try:
            bonus = MTurkConnection.get_price_as_price(amount)
            assignment = self.mtc.get_assignment(assignment_id)[0]
            worker_id = assignment.WorkerId
            self.mtc.grant_bonus(worker_id, assignment_id, bonus, reason)
            return True
        except MTurkRequestError as e:
            return dict(success=False, message=e.error_message)

    def approve_worker(self, assignment_id, feedback=None):
        """ Approve worker """
        if not self.connect_to_turk():
            return dict(success=False, message='Could not connect to AWS')
        try:
            self.mtc.approve_assignment(assignment_id, feedback=feedback)
            return True
        except MTurkRequestError as e:
            return False

    def reject_worker(self, assignment_id):
        """ Reject worker """
        if not self.connect_to_turk():
            return dict(success=False, message='Could not connect to AWS')
        try:
            self.mtc.reject_assignment(assignment_id, feedback=None)
            return True
        except MTurkRequestError as e:
            return dict(success=False, message=e.error_message)

    def unreject_worker(self, assignment_id):
        """ Unreject worker """
        if not self.connect_to_turk():
            return dict(success=False, message='Could not connect to AWS')
        try:
            self.mtc.approve_rejected_assignment(assignment_id)
            return True
        except MTurkRequestError as e:
            return dict(success=False, message=e.error_message)

    def assign_qualification(self,
                             qualification_type_id,
                             worker_id,
                             value=1,
                             send_notification=True):
        if not self.connect_to_turk():
            return dict(success=False, message='Could not connect to AWS')
        try:
            self.mtc.assign_qualification(qualification_type_id, worker_id,
                                          value, send_notification)
            return True
        except MTurkRequestError as e:
            return dict(success=False, message=e.error_message)

    def revoke_qualification(self,
                             subject_id,
                             qualification_type_id,
                             reason=None):
        if not self.connect_to_turk():
            return False
        try:
            self.mtc.revoke_qualification(subject_id, qualification_type_id,
                                          reason)
            return True
        except MTurkRequestError as e:
            return dict(success=False, message=e.error_message)

    def notify_worker(self, worker_id, subject, message_text):
        if not self.connect_to_turk():
            return False
        try:
            self.mtc.notify_workers(worker_id, subject, message_text)
            return True
        except MTurkRequestError as e:
            return dict(success=False, message=e.error_message)

    def list_workers_with_qualification(self, qualification_type_id):
        if not self.connect_to_turk():
            return False
        try:
            workers = self.mtc.get_all_qualifications_for_qual_type(
                qualification_type_id)
        except MTurkRequestError as e:
            return dict(success=False, message=e.error_message)
        workers = [w.SubjectId for w in workers]
        return workers
Exemplo n.º 13
0
class MturkHelper(object):
    """
		This class handles task creation for amazon mechanical task service.

		Amazon MTruk is used to crowdsource matching products.

		Initialisation :
			- reference : reference of the product
			- osm_from : the origin osm of a product
			- osm_to : the osm to look into
	"""
    if settings.SANDBOX:
        AWS_SECRET_ACCESS_KEY = settings.AWS_SECRET_ACCESS_KEY
        AWS_ACCESS_KEY_ID = settings.AWS_ACCESS_KEY_ID
    else:
        AWS_SECRET_ACCESS_KEY = 'e6/8e5lcCcESPKT/fe6kYkJtf0+7F2w7459WTJ0v'
        AWS_ACCESS_KEY_ID = 'AKIAIP5JQO7FQX6Q7JAQ'

    def __init__(self,
                 reference=None,
                 osm_from=None,
                 osm_to=None,
                 key=None,
                 hitid=None):
        self.reference = reference
        self.osm_from = osm_from
        self.osm_to = osm_to
        self.key = key
        self.hitid = hitid
        if key is None:
            self.task = None
        else:
            self.task = self.get_task()

        self.mtc = MTurkConnection(
            aws_access_key_id=MturkHelper.AWS_ACCESS_KEY_ID,
            aws_secret_access_key=MturkHelper.AWS_SECRET_ACCESS_KEY,
            host=settings.HOST)

    def get_all_reviewable_hits(self):
        page_size = 50
        hits = self.mtc.get_reviewable_hits(page_size=page_size)
        print "Total results to fetch %s " % hits.TotalNumResults
        print "Request hits page %i" % 1
        total_pages = float(hits.TotalNumResults) / page_size
        int_total = int(total_pages)
        if (total_pages - int_total > 0):
            total_pages = int_total + 1
        else:
            total_pages = int_total
        pn = 1
        while pn < total_pages:
            pn = pn + 1
            print "Request hits page %i" % pn
            temp_hits = self.mtc.get_reviewable_hits(page_size=page_size,
                                                     page_number=pn)
            hits.extend(temp_hits)

        return hits

    def get_hits(self, validate=False, all_hits=False):
        if not all_hits:
            hits = self.get_all_reviewable_hits()
        else:
            hits = self.mtc.get_all_hits()
        for hit in hits:
            print "####################"
            print "--------------------"
            print "HitId = %s" % (hit.HITId)
            assignments = self.mtc.get_assignments(hit.HITId)
            # Getting task associated to hit
            task = Task.objects.filter(hitId=hit.HITId)
            print 'Number of corresponding tasks = %d' % len(task)
            if len(task) > 0:
                task = task[0]
            else:
                task = None

            for assignment in assignments:
                print "AssignmentId = %s" % (assignment.AssignmentId)
                print "Answers of the worker %s" % assignment.WorkerId
                for question_form_answer in assignment.answers[0]:
                    qid = question_form_answer.qid
                    if qid == 'flagged':
                        for value in question_form_answer.fields:
                            # Saving resultTask
                            if task is not None:
                                print 'Saving result task, result = %s' % (
                                    value)
                                resulttask, created = ResultTask.objects.get_or_create(
                                    task=task,
                                    assignementId=assignment.AssignmentId,
                                    workerId=assignment.WorkerId)
                                resulttask.reference = value
                                resulttask.save()
                            elif validate:
                                try:
                                    self.mtc.approve_assignment(
                                        assignment.AssignmentId)
                                except Exception, e:
                                    print e
            try:
                if validate:
                    self.mtc.disable_hit(hit.HITId)
            except Exception, e:
                print e

                print "--------------------"
Exemplo n.º 14
0
        # get information about the HIT
        hit, = conn.get_hit(args.hit, ['HITDetail', 'HITAssignmentSummary'])
        total = int(hit.MaxAssignments)
        pending = int(hit.NumberOfAssignmentsPending)
        complete = int(hit.NumberOfAssignmentsCompleted)
        available = int(hit.NumberOfAssignmentsAvailable)
        logging.info(
            "max:%s/pending:%s/complete:%s/remain:%s",
            total, pending, complete, available)

        # check if we have reached the total
        if total >= args.total:
            logging.info("MaxAssignments = %s, exiting", total)
            break

        # compute how many assignments are currently outstanding
        current = available + pending
        if current < args.concurrent:
            diff = min(args.total - total, args.concurrent - current)
            logging.info("Extending HIT with %s more assignments", diff)
            conn.extend_hit(args.hit, assignments_increment=diff)

        # get submitted assignments and approve them
        if args.approve:
            assignments = conn.get_assignments(args.hit, status="Submitted", page_size=100)
            for assignment in assignments:
                logging.info("Approving assignment %s", assignment.AssignmentId)
                conn.approve_assignment(assignment.AssignmentId, feedback=None)

        time.sleep(args.interval)
Exemplo n.º 15
0
                bonus = bonuses.get_bonus(player_id, in_dir + '/confirmatory/games/', in_dir + '/confirmatory/waiting_games/')
        else:
            data_dir = in_dir + '/games/'
            waiting_dir = in_dir + '/waiting_games/'
            bonus = bonuses.get_bonus(player_id, data_dir, waiting_dir)
        if not re.match('\d{4}-\w{8}-\w{4}-\w{4}-\w{4}-\w{12}', player_id):
            bonus = ''
        if bonus != '':
            total_bonus += bonus
            n_valid += 1
            if player_id not in inactive:
                n_usable += 1
        if bonus != '' and res.AssignmentStatus == 'Submitted':
            if pay:
                print('paying', res.WorkerId, player_id, str(bonus))
                mtc.approve_assignment(res.AssignmentId)
                if bonus > 0:
                    mtc.grant_bonus(res.WorkerId, res.AssignmentId, Price(bonus), message)
            else:
                print(hit_id, res.WorkerId, player_id, str(bonus))
        else:
            review += [(hit_id, res.WorkerId, res.AssignmentStatus, player_id, bonus)]

print()
print('Please review:')
for worker in review:
    print(worker)
print()

print('Total Samples:', n_hits)
print('Valid Samples:', n_valid)
Exemplo n.º 16
0
    '--sandbox',
    action='store_true',
    help=
    'Run the command in the Mechanical Turk Sandbox (used for testing purposes)'
)
parser.add_argument(
    '-p',
    '--profile',
    help=
    'Run commands using specific aws credentials rather the default. To set-up alternative credentials see http://boto3.readthedocs.org/en/latest/guide/configuration.html#shared-credentials-file'
)
args = parser.parse_args()

if args.sandbox:
    if not config.has_section('MTurk'):
        config.add_section('MTurk')
    config.set('MTurk', 'sandbox', 'True')
    mturk_website = 'requestersandbox.mturk.com'

results = pd.read_csv(args.resultsfile, sep='\t')

needapproval = results[results['assignmentstatus'] == 'Submitted']

mtc = MTurkConnection(is_secure=True, profile_name=args.profile)

# TODO: to copy behavior of Java tools, reject any that have an 'x' in the
# 'reject' column and send feedback based on value of 'feedback' column
for a in list(needapproval['assignmentid']):
    print("Approving {}".format(a))
    mtc.approve_assignment(a)
def go():
    options = parseCommandLine()
    ACCESS_ID = options.access_id
    SECRET_KEY = options.secret_key

    if ACCESS_ID == None or SECRET_KEY == None:
        print("missing AWS credentials")
        sys.exit(1)

    HOST = 'mechanicalturk.amazonaws.com'

    mtc = MTurkConnection(aws_access_key_id=ACCESS_ID,
                          aws_secret_access_key=SECRET_KEY,
                          host=HOST)

    results_dir = "./results"

    magic_extension_map = {
        'JPEG': '.jpeg',
        'PNG': '.png'
    }

    hit_count = counter()
    assignment_count = counter()
    accept_count = counter()
    reject_count = counter()

    for hit in mtc.get_all_hits():
        hit_count.next()
        title = hit.Title.lower()
        tokens = title.split()
        original_name = None
        if tokens[-1].endswith('.jpg') or tokens[-1].endswith('.png'):
            (basename, ext) = os.path.splitext(tokens[-1])
        else:
            print("Skipping HIT: " + hit.Title)
            continue
        output_dir = os.path.join(results_dir, basename)
        if not os.path.exists(output_dir):
            os.makedirs(output_dir)

        for assignment in mtc.get_assignments(hit.HITId):
            if options.skip_approved and assignment.AssignmentStatus == 'Approved':
                continue

            if options.skip_rejected and assignment.AssignmentStatus == 'Rejected':
                continue

            print("Assignment Status %s" % assignment.AssignmentStatus)
            assignment_count.next()
            assignment_filename = assignment.AssignmentId
            output_filename = os.path.join(output_dir, assignment_filename)
            url = get_file_upload_url_only(mtc, assignment.AssignmentId)
            if not url:
                reject_count.next()
                if options.reject:
                    print("   Rejecting " + assignment.AssignmentId)
                    mtc.reject_assignment(
                        assignment.AssignmentId, "We require a downloadable file as a result per the instructions. No file found in your submission.")
                else:
                    print(
                        "   No downloadable file found. Use --reject to reject " + assignment.AssignmentId)
            else:
                if options.download:
                    bytes_written = curl_url_to_output_file(
                        url, output_filename)
                    magic_info = magic.from_file(output_filename)
                    magic_type = magic_info.split()[0]
                    add_extension = magic_extension_map.get(magic_type, '.dat')

                    # If we don't get .png, .jpeg, we really can't use the files.

                    print("Processing assignment: " + assignment.AssignmentId)
                    if add_extension == '.dat':
                        reject_count.next()
                        if options.reject:
                            print("   Rejecting " + assignment.AssignmentId)
                            mtc.reject_assignment(
                                assignment.AssignmentId, "We require a .png file as a result per the instructions. You submitted " + magic_type)
                        else:
                            print("   Use --reject to reject " +
                                  assignment.AssignmentId)
                    else:
                        accept_count.next()
                        if options.accept:
                            print("   Accepting " + assignment.AssignmentId)
                            mtc.approve_assignment(assignment.AssignmentId)
                        else:
                            print("   Use --accept to accept " +
                                  assignment.AssignmentId)
                        os.rename(output_filename,
                                  output_filename + add_extension)

                else:
                    print("   Use --download to fetch " + url)

    print("Total hits = %d; assignments = %d; accept = %d; reject = %d" % (
        hit_count.next(), assignment_count.next(), accept_count.next(), reject_count.next()))
Exemplo n.º 18
0
class MturkHelper(object):
	"""
		This class handles task creation for amazon mechanical task service.

		Amazon MTruk is used to crowdsource matching products.

		Initialisation :
			- reference : reference of the product
			- osm_from : the origin osm of a product
			- osm_to : the osm to look into
	"""
	if settings.SANDBOX:
		AWS_SECRET_ACCESS_KEY = settings.AWS_SECRET_ACCESS_KEY
		AWS_ACCESS_KEY_ID = settings.AWS_ACCESS_KEY_ID
	else:
		AWS_SECRET_ACCESS_KEY = 'e6/8e5lcCcESPKT/fe6kYkJtf0+7F2w7459WTJ0v'
		AWS_ACCESS_KEY_ID = 'AKIAIP5JQO7FQX6Q7JAQ'


	def __init__(self, reference = None, osm_from = None, osm_to = None, key = None, hitid = None):
		self.reference = reference
		self.osm_from = osm_from
		self.osm_to = osm_to
		self.key = key
		self.hitid = hitid
		if key is None:
			self.task = None
		else:
			self.task = self.get_task()

		self.mtc = MTurkConnection(aws_access_key_id=MturkHelper.AWS_ACCESS_KEY_ID,
									aws_secret_access_key=MturkHelper.AWS_SECRET_ACCESS_KEY,
									host=settings.HOST)

	def get_all_reviewable_hits(self):
		page_size = 50
		hits = self.mtc.get_reviewable_hits(page_size=page_size)
		print "Total results to fetch %s " % hits.TotalNumResults
		print "Request hits page %i" % 1
		total_pages = float(hits.TotalNumResults)/page_size
		int_total= int(total_pages)
		if(total_pages-int_total>0):
			total_pages = int_total+1
		else:
			total_pages = int_total
		pn = 1
		while pn < total_pages:
			pn = pn + 1
			print "Request hits page %i" % pn
			temp_hits = self.mtc.get_reviewable_hits(page_size=page_size,page_number=pn)
			hits.extend(temp_hits)

		return hits

	def get_hits(self, validate = False, all_hits = False):
		if not all_hits:
			hits = self.get_all_reviewable_hits()
		else:
			hits = self.mtc.get_all_hits()
		for hit in hits:
			print "####################"
			print "--------------------"
			print "HitId = %s"%(hit.HITId)
			assignments = self.mtc.get_assignments(hit.HITId)
			# Getting task associated to hit
			task = Task.objects.filter(hitId = hit.HITId)
			print 'Number of corresponding tasks = %d'%len(task)
			if len(task)>0:
				task = task[0]
			else:
				task = None

			for assignment in assignments:
				print "AssignmentId = %s"%(assignment.AssignmentId)
				print "Answers of the worker %s" % assignment.WorkerId
				for question_form_answer in assignment.answers[0]:
					qid = question_form_answer.qid
					if qid == 'flagged':
						for value in question_form_answer.fields:
							# Saving resultTask
							if task is not None:
								print 'Saving result task, result = %s'%(value)
								resulttask, created = ResultTask.objects.get_or_create(task = task, assignementId = assignment.AssignmentId, workerId = assignment.WorkerId)
								resulttask.reference = value
								resulttask.save()
							elif validate:
								try:
									self.mtc.approve_assignment(assignment.AssignmentId)
								except Exception, e:
									print e
			try:
				if validate:
					self.mtc.disable_hit(hit.HITId)
			except Exception, e:
				print e
							
				print "--------------------"
Exemplo n.º 19
0
            # 		batch_finished = batch_finished - batch_finished_pre
            # 		print "Batch finished: %i" % batch_finished
            # 		bonus_price = batch_finished*0.5+batch_finished/10*1
            # 		bonus = mtc.get_price_as_price(bonus_price)
            # 		mtc.grant_bonus(assignment.WorkerId, assignment.AssignmentId, bonus, 'good job!')
            # 		print "Bonus granted: $%f" % bonus_price
            # 		print "--------------------"
            # 	except:
            # 		print 'The assignment is already approved or hasn\'t been rejected before'
            # 		print "--------------------"
            # 	continue

            if valid_code(confirm_code[0]):

                try:
                    mtc.approve_assignment(assignment.AssignmentId)
                    print "Worker %s is approved" % assignment.WorkerId
                    print "The confirm code submitted:", confirm_code[0]

                    f.write(assignment.WorkerId)
                    f.write('\t')
                    f.write(confirm_code[0])
                    f.write('\n')

                    batch_finished_pre = get_batch_finished_pre(
                        assignment.WorkerId, worker_code)
                    if assignment.WorkerId not in worker_code:
                        worker_code[assignment.WorkerId] = [confirm_code[0]]
                    else:
                        worker_code[assignment.WorkerId].append(
                            confirm_code[0])
class TranscriptionPipelineHandler():
    def __init__(self):
        aws_id = os.environ['AWS_ACCESS_KEY_ID']
        aws_k = os.environ['AWS_ACCESS_KEY']

        self.conn = MTurkConnection(aws_access_key_id=aws_id,\
                          aws_secret_access_key=aws_k,\
                          host=HOST)
        
        self.ah = AssignmentHandler(self.conn)
        self.th = TurkerHandler(self.conn)
        self.hh = HitHandler(self.conn,TEMPLATE_DIR)
        self.mh = MongoTranscriptionHandler()
        self.wh = WavHandler()
        self.ph = PromptHandler()
        self.filter = Filter(self.mh)
        self.balance = self.conn.get_account_balance()[0].amount
        self.logger = logging.getLogger("transcription_engine.transcription_pipeline_handler")
        
    def audio_clip_referenced_to_hit(self,priority=1,max_queue_size=10):    
        for audio_clip in self.mh.get_artifacts_by_state("audio_clips","Referenced"):
            audio_clip_id = audio_clip["_id"]
            self.mh.queue_clip(audio_clip_id, priority, max_queue_size)
            response = self.audio_clip_queue_to_hit()

    def audio_clip_queued_to_hit(self,priority=1,max_queue_size=10):    
        for audio_clip in self.mh.get_artifacts("audio_clips",{"state":"Queued"}):
            audio_clip_id = audio_clip["_id"]
            response = self.audio_clip_queue_to_hit()
            #===================================================================
            # elif state == "Hit":
            #     print("In hit: %s"%audio_clip_url)
            #===================================================================

    
    def audio_clip_queue_to_hit(self,cost_sensitive=True):
        """Take queued audio clips from the audio clip queue
            put them in a hit and create the hit.
            If successful, update the audio clip state."""
        clip_queue = self.mh.get_audio_clip_queue()
        clip_pairs = self.mh.get_audio_clip_pairs(clip_queue)
        if clip_pairs:
            hit_title = "Audio Transcription"
            question_title = "List and Transcribe" 
            description = "Transcribe the audio clip by typing the words the person says in order."
            keywords = "audio, transcription, audio transcription"
            if cost_sensitive:
                reward_per_clip = 0.02
                max_assignments = 3
                estimated_cost = self.hh.estimate_html_HIT_cost(clip_pairs,reward_per_clip,max_assignments)
                clips_in_hits = self.mh.clips_already_in_hit(clip_pairs)
                if clips_in_hits:
                    #If one or more clips are already in a HIT, remove it from the queue
                    self.mh.remove_audio_clips_from_queue(clips_in_hits)
                elif self.balance - estimated_cost >= 250:
                    #if we have enough money, create the HIT
                    response = self.hh.make_html_transcription_HIT(clip_pairs,hit_title,
                                                 question_title, description, keywords)
                    self.balance = self.balance - estimated_cost
                    if type(response) == ResultSet and len(response) == 1 and response[0].IsValid:
                        response = response[0]
                        self.mh.remove_audio_clips_from_queue(clip_queue)
                        audio_clip_ids = [w["audio_clip_id"] for w in clip_queue]    
                        hit_id = response.HITId
                        hit_type_id = response.HITTypeId
                        self.mh.create_transcription_hit_artifact(hit_id,hit_type_id,clip_queue,"New")        
                        self.logger.info("Successfully created HIT: %s"%hit_id)
                        return self.mh.update_audio_clips_state(audio_clip_ids,"Hit")
                else:
                    pass
        return False
            
    def load_assignments_hit_to_submitted(self):
        """Check all assignments for audio clip IDs.
            Update the audio clips.
            This is a non-destructive load of the assignments from MTurk"""
        hits = self.conn.get_all_hits()
        for hit in hits:
            transcription_dicts = [{}]
            hit_id = hit.HITId
            assignments = self.conn.get_assignments(hit_id)
            have_all_assignments = True
            assignment_ids = []
            for assignment in assignments:
                assignment_ids.append(assignment.AssignmentId)  
                if self.mh.get_artifact("assignments",{"_id":assignment.AssignmentId}):
                    #We create assignments here, so if we already have it, skip
                    continue   
                else:
                    have_all_assignments = False                                         
                transcription_ids = []                
                transcription_dicts = self.ah.get_assignment_submitted_transcriptions(assignment)   
                if transcription_dicts and len(transcription_dicts)==10:
                    pass             
                for transcription in transcription_dicts:
                    if not self.mh.get_artifact_by_id("audio_clips",transcription["audio_clip_id"]): 
                        self.logger.info("Assignment(%s) with unknown audio clip(%s) skipped"%\
                                    (assignment.AssignmentId,transcription["audio_clip_id"]))
                        break 
                    self.mh.update_transcription_state(transcription,"Submitted")
                    self.mh.update_audio_clips_state([transcription["audio_clip_id"]], "Submitted")
                    transcription_ids.append(self.mh.get_artifact("transcriptions",{"audio_clip_id" : transcription["audio_clip_id"],
                                                                        "assignment_id" : transcription["assignment_id"]},
                                                                       "_id"))
                else:
                    self.mh.create_assignment_artifact(assignment,
                                                   transcription_ids,
                                                   "Submitted")
            if assignments and not have_all_assignments:
                self.mh.update_transcription_hit_state(hit_id,"Submitted")
            print("Transcriptions HIT(%s) submitted assignments: %s "%(hit_id,assignment_ids))
            
    def assignment_submitted_approved(self):
        """For all submitted assignments,
            if an answered question has a reference transcription,
            check the WER.
            If all the answered questions with reference transcriptions
            have an acceptable WER, approve the assignment and update
            the audio clips and transcriptions."""
        assignments = self.mh.get_artifacts_by_state("assignments", "Submitted")
        rejected_feedback = "I'm sorry but your work in assignment(%s) was rejected because" +\
                            " one or more of your transcriptions " +\
                            " had a word error rate above the maximum acceptable"+\
                            " word error rate of %s. Omitted words and words that "+\
                            " differed by more than %s "+\
                            " characters were counted as an error."
        accepted_feedback = "Your average word error rate on assignment(%s) was %s."+\
                            " Assignment accepted! Thanks for your hard work."
        for assignment in assignments:
            assignment_id = assignment["_id"]
            transcription_ids = assignment["transcriptions"]
            transcriptions = self.mh.get_artifacts("transcriptions","_id",transcription_ids)

            worker_id = assignment["worker_id"]
            worker_id = self.mh.create_worker_artifact(worker_id)
            
            approved, average_wer  = self.filter.approve_assignment(transcriptions)
            if approved:
                try:
                    self.conn.approve_assignment(assignment_id, accepted_feedback%(assignment_id,average_wer))
                except MTurkRequestError as e:
                    print(e)
                else:
                    self.mh.update_assignment_state(assignment,"Approved")    
                    for transcription in transcriptions:
                        #Approve transcriptions without references in the same assignment
                        reference_id = self.mh.get_artifact_by_id("audio_clips",transcription["audio_clip_id"],"reference_transcription_id")
                        if not reference_id:
                            self.mh.update_transcription_state(transcription,"Approved")                                          
                    print("Approved transcription ids: %s"%transcription_ids)
            else:
                #Don't deny for now
                feedback = rejected_feedback%(assignment_id,self.filter.WER_THRESHOLD,self.filter.CER_THRESHOLD)
                self.logger.info(feedback)
                self.conn.reject_assignment(assignment_id,feedback)
                self.mh.update_assignment_state(assignment,"Denied")    
                #print("Assignments not aproved %s "%denied)
            #Update the worker
            if approved:
                self.mh.add_assignment_to_worker(worker_id,(assignment_id,average_wer))
            
    def _load_rm_audio_source_file_to_clipped(self,file_dir,prompt_file_uri,
                                                   base_clip_dir,sample_rate=16000,
                                                   http_base_url = "http://www.cis.upenn.edu/~tturpen/wavs/",
                                                   init_clip_count = 200):
        """For an audio directory,
            see which files are new and not an audio source already
            """
        prompt_dict = self.ph.get_prompts(prompt_file_uri)
        count = 0
        for root, dirs, files in os.walk(file_dir):
            for f in files:
                if count == init_clip_count:
                    return
                system_uri = os.path.join(root,f)
                out_uri = system_uri.strip(".sph") + ".wav"
                out_uri = os.path.basename(out_uri)
                out_uri = os.path.join(root,(out_uri))
                spkr_id = str(os.path.relpath(root,file_dir))
                #sph to wav
                if not f.endswith(".wav") and not os.path.exists(out_uri):
                    try:
                        self.wh.sph_to_wav(system_uri,out_uri=out_uri)
                    except WavHandlerException as e:
                        self.logger.error("Unable to create wav from sph: "+str(e))
                        
                if os.path.exists(out_uri) and out_uri.endswith(".wav"):
                    #create audio source artifact
                    count += 1
                    wav_filename = os.path.basename(out_uri)
                    prompt_id = os.path.basename(out_uri).strip(".wav").upper()
                    encoding = ".wav"
                    sample_rate = 16000
                    disk_space = os.stat(out_uri).st_size
                    length_seconds = self.wh.get_audio_length(out_uri)
                    if prompt_id in prompt_dict:                        
                        transcription_prompt = prompt_dict[prompt_id]
                    else:
                        #No prompt found
                        raise PromptNotFound
                    source_id = self.mh.create_audio_source_artifact(out_uri,
                                                         disk_space,
                                                         length_seconds,
                                                         sample_rate,
                                                         spkr_id,
                                                         encoding)
                    #create audio clip artifact
                    audio_clip_uri = os.path.join(base_clip_dir,spkr_id,wav_filename)                    
                    clip_dir = os.path.dirname(audio_clip_uri)
                    if not os.path.exists(clip_dir):
                        os.makedirs(clip_dir)
                    if not os.path.exists(audio_clip_uri):
                        copyfile(out_uri,audio_clip_uri)     
                    #http_url
                    http_url = os.path.join(http_base_url,spkr_id,wav_filename)                   
                    clip_id = self.mh.create_audio_clip_artifact(source_id,
                                                       0,
                                                       -1,
                                                       audio_clip_uri,
                                                       http_url,
                                                       length_seconds,
                                                       disk_space)
                    
                    #Update the audio source, updates state too
                    self.mh.update_audio_source_audio_clip(source_id,clip_id)

                    #Create the reference transcription artifact
                    transcription_id = self.mh.create_reference_transcription_artifact(clip_id,
                                                                                       transcription_prompt,
                                                                                       "Gold")
                    #Completes audio clip to Referenced
                    self.mh.update_audio_clip_reference_transcription(clip_id,transcription_id)                    
        
    def all_workers_liveness(self):
        workers = self.mh.get_all_workers()
        for worker in workers:
            worker_id = worker["_id"]
            approved, denied = self.mh.get_worker_assignments(worker)
            print("Worker(%s) assignments, approved(%s) denied(%s)"%(worker["_id"],approved,denied))
            selection = input("1. Show denied transcriptions and references.\n"+
                                    "2. Show accepted transcriptions and references.\n"+
                                    "3. Show both denied and accepted transcriptions.")
            if selection == 1 or selection == 3:
                print("Approved transcriptions")
                for assignment_id in approved:
                    transcription_pairs = self.mh.get_transcription_pairs(assignment_id)
                    for pair in transcription_pairs:
                        print ("Reference:\n\t%s\nHypothesis:\n\t%s\n"%(pair[0],pair[1]))
            if selection == 2 or selection == 3:
                print("Denied transcriptions")
                for assignment_id in denied:
                    transcription_pairs = self.mh.get_transcription_pairs(assignment_id)
                    for pair in transcription_pairs:
                        print ("Reference:\n\t%s\nHypothesis:\n\t%s\n"%(pair[0],pair[1]))
            
    def stats(self):
        workers = self.mh.get_all_workers()
        all_wer_per_approved_assignment = 0.0
        total_accepted = 0.0
        for worker in workers:
            worker_wer = 0.0
            worker_id = worker["_id"]
            approved, denied = self.mh.get_worker_assignments_wer(worker)
            for w in approved: 
                all_wer_per_approved_assignment += float(w[1])
                worker_wer += float(w[1])
                total_accepted += 1
            if approved:
                worker_average_wer = worker_wer/len(approved)
                print("%s,%s"%(len(approved),worker_average_wer))
            #print("Worker(%s) approved assignments(%s)\n denied assignments(%s)"%(worker_id,approved,denied))
        av = all_wer_per_approved_assignment/total_accepted
        print("Average WER per assignment(%s)"%(av))
        
    def get_assignment_stats(self):
        self.effective_hourly_wage_for_approved_assignments(.20)                    
    
    def effective_hourly_wage_for_approved_assignments(self,reward_per_assignment):
        """Calculate the effective hourly wage for Approved Assignments"""        
        approved_assignments = self.mh.get_artifacts_by_state("assignments","Approved")
        total = datetime.timedelta(0)
        count = 0
        for assignment in approved_assignments:
            if "SubmitTime" in assignment:
                accepted = datetime.datetime.strptime(assignment["AcceptTime"],"%Y-%m-%dT%H:%M:%SZ")
                submitted = datetime.datetime.strptime(assignment["SubmitTime"],"%Y-%m-%dT%H:%M:%SZ")
            else:
                pass
            total += submitted-accepted
            count += 1
        seconds_per_assignment = total.total_seconds()/count
        effective_hourly_wage = 60.0*60.0/seconds_per_assignment * reward_per_assignment
        print("Effective completion time(%s) *reward(%s) = %s"%(seconds_per_assignment,reward_per_assignment,effective_hourly_wage))        
        
    def allhits_liveness(self):
        #allassignments = self.conn.get_assignments(hit_id)
        #first = self.ah.get_submitted_transcriptions(hit_id,str(clipid))

        hits = self.conn.get_all_hits()
        for hit in hits:
            hit_id = hit.HITId            
            print("HIT ID: %s"%hit_id)
            assignments = self.conn.get_assignments(hit_id)
            if len(assignments) == 0:
                if raw_input("Remove hit with no submitted assignments?(y/n)") == "y":
                    try:
                        self.conn.disable_hit(hit_id)
                        clips = self.mh.get_artifact("transcription_hits",{"_id": hit_id},"clips")
                        self.mh.remove_transcription_hit(hit_id)
                        self.mh.update_audio_clips_state(clips, "Referenced")
                    except MTurkRequestError as e:
                        raise e
            else:
                if raw_input("Remove hit with %s submitted assignments?(y/n)"%len(assignments)) == "y":
                    try:
                        self.conn.disable_hit(hit_id)
                    except MTurkRequestError as e:
                        raise e
                    
    def run(self):
        audio_file_dir = "/home/taylor/data/corpora/LDC/LDC93S3A/rm_comp/rm1_audio1/rm1/ind_trn"
        #audio_file_dir = "/home/taylor/data/corpora/LDC/LDC93S3A/rm_comp/rm1_audio1/rm1/dep_trn"
        prompt_file_uri = "/home/taylor/data/corpora/LDC/LDC93S3A/rm_comp/rm1_audio1/rm1/doc/al_sents.snr"
        base_clip_dir = "/home/taylor/data/corpora/LDC/LDC93S3A/rm_comp/rm1_audio1/rm1/clips"
        selection = 0
        init_clip_count = 10000
        while selection != "11":
            selection = raw_input("""Audio Source file to Audio Clip Approved Pipeline:\n
                                     1: AudioSource-FileToClipped: Initialize Resource Management audio source files to %d queueable(Referenced) clips
                                     2: AudioClip-ReferencedToHit: Queue all referenced audio clips and create a HIT if the queue is full.
                                     3: AudioClip-HitToSubmitted: Check all submitted assignments for Transcriptions.
                                     4: AudioClip-SubmittedToApproved: Check all submitted clips against their reference.
                                     5: Review Current Hits
                                     6: Worker liveness
                                     7: Account balance
                                     8: Worker stats
                                     9: Recalculate worker WER                                     
                                     10: Assignment Stats
                                     11: Exit
                                    """%init_clip_count)
            #selection = "5"
            if selection == "1":
                self._load_rm_audio_source_file_to_clipped(audio_file_dir,
                                                       prompt_file_uri,
                                                       base_clip_dir,init_clip_count=init_clip_count)
            elif selection == "2":
                self.audio_clip_referenced_to_hit()
            elif selection == "3":
                self.load_assignments_hit_to_submitted()
            elif selection == "4":
                self.assignment_submitted_approved()
            elif selection == "5":
                self.allhits_liveness()
            elif selection == "6":
                self.all_workers_liveness()
            elif selection == "7":
                print("Account balance: %s"%self.balance)
            elif selection == "8":
                self.stats()
            elif selection == "9":
                self.recalculate_worker_assignment_wer()
            elif selection == "10":
                self.get_assignment_stats()

#     def get_time_submitted_for_assignments(self):
#         assignments = self.mh.get_all_artifacts("assignments")
#         for assignment in assignments:
#             assignment_id = assignment["_id"]
#             a_assignment = self.conn.get_assignment(assignment_id)[0]
#             self.mh.update_artifact_by_id("assignments", assignment_id, "SubmitTime", a_assignment.SubmitTime)
                    
#     def recalculate_worker_assignment_wer(self):
#         """For all submitted assignments,
#             if an answered question has a reference transcription,
#             check the WER.
#             If all the answered questions with reference transcriptions
#             have an acceptable WER, approve the assignment and update
#             the audio clips and transcriptions."""
#         assignments = self.mh.get_artifacts("assignments",{"state":"Approved"})        
#         for assignment in assignments:
#             assignment_id = assignment["_id"]
#             denied = []
#             #If no transcriptions have references then we automatically approve the HIT
#             approved = True
#             transcription_ids = assignment["transcriptions"]
#             transcriptions = self.mh.get_transcriptions("_id",transcription_ids)
#             worker_id = assignment["worker_id"]
#             worker_id = self.mh.create_worker_artifact(worker_id)
#             
#             max_rej_wer = (0.0,0.0)
#             total_wer = 0.0
#             for transcription in transcriptions:
#                 #Normalize the transcription
#                 #self.mh.normalize_transcription
#                 reference_id = self.mh.get_audio_clip_by_id(transcription["audio_clip_id"],"reference_transcription_id")
#                 if reference_id:
#                     reference_transcription = self.mh.get_reference_transcription({"_id": reference_id},
#                                                                                   "transcription")
#                     new_transcription = transcription["transcription"].split(" ")
#                     if reference_transcription:
#                         transcription_wer = cer_wer(reference_transcription,new_transcription)
#                         total_wer += transcription_wer
#                         if transcription_wer < WER_THRESHOLD:
#                             self.logger.info("WER for transcription(%s) %d"%(transcription["transcription"],transcription_wer))
#                         else:
#                             max_rej_wer = (transcription_wer,WER_THRESHOLD)
#                             denied.append((reference_transcription,new_transcription))
#                             approved = False
#             average_wer = total_wer/len(transcriptions)
#             #Update the worker
#             self.mh.add_assignment_to_worker(worker_id,(assignment_id,average_wer))
Exemplo n.º 21
0
    #             worker.base_payment = 0.01
    #             unpaid.append(worker)
    #             total += worker.base_payment
    # for u in unpaid:
    #     if u.assignment_id:
    #         total += u.base_payment + u.bonus_payment
    #===========================================================================
    print "The total unpaid amount for %s workers is $%s" % (len(unpaid),
                                                             total)
    proceed = raw_input("Would you like to pay them now? [Y/N]")
    if proceed == "Y" or proceed == "y":
        for u in unpaid:
            if u.assignment_id:
                base = u.base_payment
                conn.approve_assignment(
                    u.assignment_id,
                    feedback="Thanks for playing the traffic attacker game!")
                print "Approved base payment of $%s for %s" % (base,
                                                               u.assignment_id)
            else:
                print "WARNING: Skipping %s, as assignment ID unknown " % u


def create_HIT_for_specific_worker(worker_ID):
    qualification = conn.create_qualification_type(
        "Qualification for %s" % worker_ID, "Qualification for %s" % worker_ID,
        "Active")
    print qualification
    print qualification[0]
    print qualification[0].QualificationTypeId
    conn.assign_qualification(qualification[0].QualificationTypeId, worker_ID,
Exemplo n.º 22
0
#from analysis_toolbox import *
import ast
from boto.mturk.connection import MTurkRequestError
from boto.mturk.connection import MTurkConnection
import datetime
from secret import SECRET_KEY,ACCESS_KEY,AMAZON_HOST

#Start Configuration Variables
AWS_ACCESS_KEY_ID = ACCESS_KEY
AWS_SECRET_ACCESS_KEY = SECRET_KEY

connection = MTurkConnection(aws_access_key_id=AWS_ACCESS_KEY_ID,
                             aws_secret_access_key=AWS_SECRET_ACCESS_KEY,
                             host=AMAZON_HOST)
print 'Connected to AMT'

all_hits = [hit for hit in connection.get_all_hits()]#connection.get_reviewable_hits()]#connection.get_all_hits()] #connection.get_reviewable_hits
for hit in all_hits:
    assignments = connection.get_assignments(hit.HITId)
    print assignments
    for assignment in assignments:
        print "Working on ", assignment
        try:
            connection.approve_assignment(assignment.AssignmentId)
            print 'approved ', assignment.AssignmentId
        except MTurkRequestError:
            #Problably already approved or rejected this assignment previously
            print "already approved/rejected"
            pass

Exemplo n.º 23
0
class mTurk():
    ACCESS_ID =''
    SECRET_KEY = ''
    HOST = ''
    mtc = None
    log=None
    def __init__(self,access_id,secret_key,debug=True):
        self.ACCESS_ID=access_id
        self.SECRET_KEY=secret_key
        if debug:
            self.HOST='mechanicalturk.sandbox.amazonaws.com'
        else:
            raise Exception('sure you want to spend money')
        
        self.mtc = MTurkConnection(aws_access_key_id=self.ACCESS_ID,
                              aws_secret_access_key=self.SECRET_KEY,
                              host=self.HOST)
        
    def getBalance(self):
        return self.mtc.get_account_balance()
    
    def hasEnoughMoney(self,cost):
        return cost<self.getBalance()
    
    
    def approveAssignemnt(self,a_id,feedback="Thanks for contribution"):
        assignment= self.mtc.get_assignment(a_id)[0]
        log.debug("%s",assignment.__dict__)

        if assignment.AssignmentStatus!="Approved":
            ret = self.mtc.approve_assignment(a_id, feedback)
            log.debug("approve ret: %s",ret)
        else:
            log.debug("already approvred")
            
    def rejectAssigment(self,a_id,feedback="Sorry, but your work was not satifactory"):
        ret = self.mtc.reject_assignment(a_id, feedback)
        log.debug("reject ret: %s",ret)

    
    def createExternalQuestion(self,title,description,keywords,url,duration,reward):
        ex_q=ExternalQuestion(url,1000)  
        res=self.mtc.create_hit(question=ex_q,max_assignments=1,title=title,description=description,keywords=keywords,duration = duration,reward=reward)   
        log.debug("created external question %s",res[0])
        return res[0]
    
    def getDataFromHit(self, assignmentId, workerId):
        assignment= self.mtc.get_assignment(assignmentId)[0]
        log.debug("Answers of the worker %s",  assignment.WorkerId)
        if assignment.WorkerId == workerId: 
            ret = {}
            for question_form_answer in assignment.answers[0]:
                if len(question_form_answer.fields)>1:
                    log.debug("answers are >1")
                    ret[question_form_answer.qid]=question_form_answer.fields
                else:
                    ret[question_form_answer.qid]=question_form_answer.fields[0]
                
        return ret
#                print question_form_answer.qid," - "," ".join(question_form_answer.fields)           
#        # for r in res:        
#            # print "Your hit ID is this %s -> https://workersandbox.mturk.com/mturk/preview?groupId=%s"%(r.HITId,r.HITTypeId))
#        
#    # def printAllHits(self):
#    #        hits=self.mtc.get_all_hits()
#    #        for hit in hits:
#    #            print printAtt(hit,'HITId')            
#            
#  #   def rejectAllHITs(self):
#  #         hits=self.mtc.get_all_hits()
#  #         for hit in hits:
#  #             assignements = self.mtc.get_assignments(hit.HITId)
#  #             for assignment in assignements:
#  #                 try:
#  #                     self.mtc.reject_assignment(assignment.AssignmentId, "i'm just testing this functionality")
#  #                     print "Rejected the assignment %s of HIT %s"%(assignment.AssignmentId,hit.HITId)
#  #                 except Exception:
#  #                     print "ERROR with the assignment %s of HIT %s"%(assignment.AssignmentId,hit.HITId)
#  # #           ret = self.mtc.disable_hit(hit.HITId, "HITDetail")
##            print ret   
Exemplo n.º 24
0
def processHITs(verbose=True,
                approveAll=False,
                deleteAll=False,
                insertComparisons=False):

    mtc = MTurkConnection(host=_host)
    hits = getReviewableHITs(verbose)
    # store hit info here, for persistence
    _hits_vector = []
    _rejected_hits = []
    _flagged_hits = []
    # stats variables
    worker_ids = set()

    for hit in hits:
        assignments = mtc.get_assignments(hit.HITId, page_size=50)
        for assignment in assignments:
            worker_ids.add(assignment.WorkerId)
            if verbose:
                print "Answers of the worker: [%s]" % assignment.WorkerId

            _worker_id = ''
            _worker_exp = 0
            _hit_id = 0
            _assignment_id = ''
            _gui_rating = ''
            _hit_comment = ''
            _hit_rt = 0
            _hit_it = 0
            _trials_results = ''
            _hit_interactions_str = ''
            _hit_reject_flag = False
            _hit_flag = False

            for question_form_answer in assignment.answers[0]:
                key = question_form_answer.qid
                value = question_form_answer.fields

                if key == '_worker_id':
                    _worker_id = value[0]
                    if verbose:
                        print " - Worker ID: [%s]" % (_worker_id)
                elif key == '_worker_exp':
                    _worker_exp = int(value[0])
                    if verbose:
                        print " - Worker experience: [%d]" % (_worker_exp)
                elif key == '_hit_id':
                    _hit_id = int(value[0])
                    if verbose:
                        print " - HIT ID: [%d]" % (_hit_id)
                elif key == '_assignment_id':
                    _assignment_id = value[0]
                    if verbose:
                        print " - Assignment ID: [%s]" % (_assignment_id)
                elif key == '_gui_rating':
                    _gui_rating = value[0]
                    try:
                        _gui_rating = int(_gui_rating)
                    except ValueError:
                        _gui_rating = -1
                    if verbose:
                        print " - GUI rating: [%d/10]" % (_gui_rating)
                elif key == '_hit_comment':
                    _hit_comment = value[0]
                    if verbose:
                        print " - HIT comment: [%s]" % (_hit_comment)
                elif key == '_hit_rt':
                    _hit_rt = int(value[0])
                    if verbose:
                        print " - HIT response time: [%d]" % (_hit_rt)
                elif key == '_hit_it':
                    _hit_it = int(value[0])
                    if verbose:
                        print " - HIT instruction time: [%d]" % (_hit_it)
                elif key == '_trials_results':
                    _trials_results = value[0]
                    if verbose:
                        print " - All HIT's trials results: [%s]" % (
                            _trials_results)
                elif key == '_hit_interactions_str':
                    _hit_interactions_str = value[0]
                    if verbose:
                        print " - HIT interactions string: [%s]" % (
                            _hit_interactions_str)
                elif key == '_hit_reject_flag':
                    _hit_reject_flag = value[0]
                    if str(_hit_reject_flag) == 'false':
                        _hit_reject_flag = False
                    else:
                        _hit_reject_flag = True
                    if verbose:
                        print " - HIT reject flag: [%s]" % (
                            str(_hit_reject_flag))
                elif key == '_hit_flag':
                    _hit_flag = value[0]
                    if _hit_flag == 'Yes':
                        _hit_flag = True
                    else:
                        _hit_flag = False
                    if verbose:
                        print " - HIT information flag: [%s]" % (
                            str(_hit_flag))
                else:
                    print "<----------------------------->"
                    print "ERROR: unknown key [%r]" % (key, )
                    print "Relevant info:"
                    pprint(vars(assignment))
                    pprint(vars(question_form_answer))
                    print "Exiting..."
                    print "<----------------------------->"
                    return

#if insertComparisons:
#    pass
# insert the comparisons into the database

            _hit_data = assignment.__dict__.copy()
            del _hit_data['answers']

            _hit_data['_worker_id'] = _worker_id
            _hit_data['_worker_exp'] = _worker_exp
            _hit_data['_hit_id'] = _hit_id
            _hit_data['_assignment_id'] = _assignment_id
            _hit_data['_gui_rating'] = _gui_rating
            _hit_data['_hit_comment'] = _hit_comment
            _hit_data['_hit_rt'] = _hit_rt
            _hit_data['_hit_it'] = _hit_it
            _hit_data['_trials_results'] = _trials_results
            _hit_data['_hit_interactions_str'] = _hit_interactions_str
            _hit_data['_hit_reject_flag'] = _hit_reject_flag
            _hit_data['_hit_flag'] = _hit_flag

            _hits_vector.append(_hit_data)

            if _hit_reject_flag:
                _rejected_hits.append(_hit_data)
                print "<----------------------------->"
                print "This HIT is low quality - Will be rejected."
                print "Relevant info:"
                pprint(vars(assignment))
                for question_form_answer in assignment.answers[0]:
                    pprint(vars(question_form_answer))
                print "<----------------------------->"
                try:
                    mtc.reject_assignment(assignment.AssignmentId)
                except MTurkRequestError:
                    print "Could not reject [%s]" % (assignment.AssignmentId)
            else:
                if _hit_flag:
                    _flagged_hits.append(_hit_data)
                    print "<----------------------------->"
                    print "This HIT has been flagged by turker."
                    print "Relevant info:"
                    pprint(vars(assignment))
                    for question_form_answer in assignment.answers[0]:
                        pprint(vars(question_form_answer))
                    print "<----------------------------->"

                if approveAll:
                    try:
                        mtc.approve_assignment(assignment.AssignmentId)
                    except MTurkRequestError:
                        print "Could not approve [%s]" % (
                            assignment.AssignmentId)
            if verbose:
                print "<----------------------------->"

            if deleteAll:
                mtc.disable_hit(hit.HITId)

    # print out some stats
    print "Number of HITs = [%d]" % (len(_hits_vector), )
    print "Number of distinct workers = [%d]" % (len(worker_ids), )
    print "Number of rejected HITs = [%d]" % (len(_rejected_hits), )
    print "Number of flagged HITs = [%d]" % (len(_flagged_hits), )

    return_dict = {
        "_all_hits": _hits_vector,
        "_rejected_hits": _rejected_hits,
        "_flagged_hits": _flagged_hits
    }

    if 'MTURK_STORAGE_PATH' in os.environ:
        time_stamp = time.strftime("%Y-%m-%d_%H-%M-%S")
        hit_name = "completed_cocoa_5000"
        filename = os.path.join(os.environ['MTURK_STORAGE_PATH'],
                                hit_name + '_' + time_stamp + ".pkl")
        print "Storing collected hit data at %s" % (filename)
        with open(filename, 'wb') as f:
            pickle.dump(return_dict, f)
    else:
        print "WARNING: MTURK_STORAGE_PATH not set in env. Unable to save hit data."

    return return_dict
Exemplo n.º 25
0
                        help='additional configuration files')
    args = parser.parse_args()

    mturk_cfg_fname = as_project_path('resources/private/mturk.cfg')
    cfg = Config.load_configs([mturk_cfg_fname] + args.configs, log=False)

    print "Approve all outstanding HITs"

    conn = MTurkConnection(
        aws_access_key_id=cfg['MTURK']['aws_access_key_id'],
        aws_secret_access_key=cfg['MTURK']['aws_secret_access_key'],
        host=cfg['MTURK']['host'])

    for pnum in range(1, 50):
        for hit in conn.get_reviewable_hits(page_size=100, page_number=pnum):
            print "HITId:", hit.HITId

            for ass in conn.get_assignments(hit.HITId,
                                            status='Submitted',
                                            page_size=10,
                                            page_number=1):
                #print "Dir ass:", dir(ass)

                if ass.AssignmentStatus == 'Submitted':
                    mturk.print_assignment(ass)

                    print "-" * 100
                    print "Approving the assignment"
                    conn.approve_assignment(ass.AssignmentId)
                    print "-" * 100
Exemplo n.º 26
0
def main(argv):
	if (len(argv) < 2):
		print "Usage: tweetbeats.py <song_title> <instrument_number> <optional_topic>"
	else:
		user_topic = ""
		# check for command line argument
		if len(argv) > 2:
			user_topic = argv[2]

		'''
		 '  Gather Tweets
		'''
		print "Gathering Tweets..."
		tc = TweetCollector()
		results = tc.CollectTweets(user_topic)
		print "Topic: " + results[0]
		'''
		 '  Create Hits
		'''
		print "Creating HITs..."
		mtur = MTurk(ACCESS_ID, SECRET_KEY,HOST)
		for result in results[1]:
			res = filter(lambda x: x in string.printable, result)
			new_id = mtur.createHit(res)

		mtc = MTurkConnection(aws_access_key_id=ACCESS_ID, aws_secret_access_key=SECRET_KEY, host=HOST)

		hits = get_all_reviewable_hits(mtc)
		while (len(hits) < MIN_TWEETS):
			print "Not enough hits. Will try again in 10 seconds...."
			sleep(10)
			hits = get_all_reviewable_hits(mtc)

		hits3 = []
		for hit in hits:
			assignments = mtc.get_assignments(hit.HITId)
			for assignment in assignments:
				print "Answers of the worker %s" % assignment.WorkerId
				answers = []
				for question_form_answer in assignment.answers[0]:
					for value in question_form_answer.fields:
						answers.append(int(value))
				print "Responses : ", answers
				hits3.append(answers)
				mtc.approve_assignment(assignment.AssignmentId)
				print "--------------------"
			mtc.disable_hit(hit.HITId)

		#Remove unused HITS; make 5 passes to clean up as best we can
		print "Removing unused HITs... Pass #1 of 5"
		hits = mtc.get_all_hits()
		for hit in hits:
			mtc.disable_hit(hit.HITId)

		print "Removing unused HITs... Pass #2 of 5"
		sleep(20)
		hits = mtc.get_all_hits()
		for hit in hits:
			mtc.disable_hit(hit.HITId)

		print "Removing unused HITs... Pass #3 of 5"
		sleep(20)
		hits = mtc.get_all_hits()
		for hit in hits:
			mtc.disable_hit(hit.HITId)

		print "Removing unused HITs... Pass #4 of 5"
		sleep(20)
		hits = mtc.get_all_hits()
		for hit in hits:
			mtc.disable_hit(hit.HITId)

		print "Removing unused HITs... Pass #5 of 5"
		sleep(20)
		hits = mtc.get_all_hits()
		for hit in hits:
			mtc.disable_hit(hit.HITId)

		'''
		 '  Make Hits into Music
		'''
		initializeTrack(argv[1])
		time = 1
		for result in hits3:

			duration = 0
			durationResult = result[1]
			if durationResult == 1:
				duration = .375 		#dotted sixteenth
			elif durationResult == 2:
				duration = .5 	 		#eighth
			elif durationResult == 3:
				duration = .75 			#dotted eigth
			elif durationResult == 4:
				duration = 1 			#quarter
			elif durationResult == 5:
				duration = 1.5 			#dotted quarter
			elif durationResult == 6:
				duration = 2 			#half
			elif durationResult == 7:
				duration = 3 			#dotted half
			elif durationResult == 8:
				duration = 4 			#whole

			shift = random.choice([-11, -10, -9, -8, -7, -6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11])

			chord = result[0]
			if chord == 1:
				addChord(time, duration, 100, 60 + shift, 64 + shift, 67 + shift, -1) #C maj 	Joy
			elif chord == 2:
				addChord(time, duration, 100, 60 + shift, 63 + shift, 67 + shift, 70 + shift) #C min9	Sadness
			elif chord == 3:
				addChord(time, duration, 100, 60 + shift, 64 + shift, 66 + shift, 69 + shift) #C dim7	Anger
			elif chord == 4:
				addChord(time, duration, 100, 60 + shift, 64 + shift, 66 + shift, -1) #C flat5	Fear
			elif chord == 5:
				addChord(time, duration, 100, 60 + shift, 64 + shift, 67 + shift, 69 + shift) #C maj6	Trust
			elif chord == 6:
				addChord(time, duration, 100, 60 + shift, 63 + shift, 67 + shift, 69 + shift) #C m6 	Distrust
			elif chord == 7:
				addChord(time, duration, 100, 60 + shift, 63 + shift, 66 + shift, 70 + shift) #C m7b5	Surprise
			elif chord == 8:
				addChord(time, duration, 100, 60 + shift, 64 + shift, 67 + shift, 71 + shift) #C maj7	Anticipation

			time += duration
		addChord(time, 4, 000, 60, 60, 60, 60) #silence to allow last note to fade out
		closeTrack(argv[0])

		music_file = argv[0] + ".mid" 
		# set up the mixer 
		freq = 44100 # audio CD quality 
		bitsize = -16 # unsigned 16 bit 
		channels = 2 # 1 is mono, 2 is stereo 
		buffer = 2048 # number of samples 
		pygame.mixer.init(freq, bitsize, channels, buffer) 
		# optional volume 0 to 1.0 
		pygame.mixer.music.set_volume(1.0) 
		
		pygame.mixer.music.load(music_file) 
		print "Music file %s loaded!" % music_file 
		clock = pygame.time.Clock() 
		pygame.mixer.music.play() 
		while pygame.mixer.music.get_busy(): 
			# check if playback has finished 
			clock.tick(30) 
Exemplo n.º 27
0
class amusic(object):
    engine = None
    origconf = {'minPitch':12,'maxPitch':84,'minStartTime':0.0,'maxStartTime':200.0,
                'minNoteDuration':0.5,'maxNoteDuration':5.0,'minNoteCount':50,'maxNoteCount':400,
                'currentPopulation':'','bucket':None,'hitRewardPerAssignment':0.05,'mturkLayoutID':None,
                'hitTitle':None,'hitDescription':None,'hitKeywords':None}
    conf = origconf.copy()
    s3bucket = None
    mtc = None
    class PopulationNotSet( Exception ): pass
    class SongNotFound( Exception ): pass
    def __init__(self,username,password,ACCESS_ID,SECRET_KEY,initialize=False):
        self.ACCESS_ID = ACCESS_ID
        self.SECRET_KEY = SECRET_KEY
        self.engine = sqlalchemy.create_engine('mysql+mysqlconnector://%s:%s@localhost' % (username,password))
        self.engine.connect()
        self.mtc = MTurkConnection(self.ACCESS_ID,self.SECRET_KEY,host='mechanicalturk.sandbox.amazonaws.com')
        try:
            self.engine.execute('USE amusic;')
            c = self.engine.execute('SELECT * FROM conf;').fetchone()
            self.conf['minPitch'] = c[0]
            self.conf['maxPitch'] = c[1]
            self.conf['minStartTime'] = c[2]
            self.conf['maxStartTime'] = c[3]
            self.conf['minNoteDuration'] = c[4]
            self.conf['maxNoteDuration'] = c[5]
            self.conf['minNoteCount'] = c[6]
            self.conf['maxNoteCount'] = c[7]
            self.conf['currentPopulation'] = c[8]
            self.conf['bucket'] = c[9]
            self.conf['mturkLayoutID'] = c[10]
            self.conf['hitTitle'] = c[11]
            self.conf['hitDescription'] = c[12]
            self.conf['hitKeywords'] = c[13]
            self.conf['hitRewardPerAssignment'] = c[14]
        except:pass
    def inits3(self):
        c = boto.connect_s3(self.ACCESS_ID,self.SECRET_KEY)
        self.s3bucket = c.create_bucket(self.conf['bucket'])
        self.s3bucket.set_acl('public-read')
    def initialize(self):
        try: self.engine.execute('DROP DATABASE IF EXISTS amusic;')
        except: pass
        self.engine.execute('CREATE DATABASE amusic;')
        self.engine.execute('USE amusic;')
        self.engine.execute('CREATE TABLE conf (minPitch INT,\
                                           maxPitch INT,\
                                           minStartTime FLOAT,\
                                           maxStartTime FLOAT,\
                                           minNoteDuration FLOAT,\
                                           maxNoteDuration FLOAT,\
                                           minNoteCount INT,\
                                           maxNoteCount INT,\
                                           currentPopulation VARCHAR(100),\
                                           bucket VARCHAR(100),\
                                           mturkLayoutID VARCHAR(100),\
                                           hitTitle VARCHAR(100),\
                                           hitDescription VARCHAR(100),\
                                           hitKeywords VARCHAR(100),\
                                           hitRewardPerAssignment FLOAT,\
                                           CONSTRAINT fk_1 FOREIGN KEY (`currentPopulation`) REFERENCES population (title),\
                                           PRIMARY KEY(minPitch)\
                                           ) ENGINE = MYISAM;')
        self.engine.execute('INSERT INTO conf (minPitch,\
                                          maxPitch,\
                                          minStartTime,\
                                          maxStartTime,\
                                          minNoteDuration,\
                                          maxNoteDuration,\
                                          minNoteCount,\
                                          maxNoteCount,\
                                          currentPopulation,\
                                          bucket,\
                                          hitRewardPerAssignment,\
                                          mturkLayoutID,\
                                          hitTitle,\
                                          hitKeywords,\
                                          hitDescription)\
                                          VALUES(%d,%d,%f,%f,%f,%f,%d,%d,"%s","%s",%f,"%s","%s","%s","%s");'%
                                          (self.origconf['minPitch'],self.origconf['maxPitch'],self.origconf['minStartTime'],
                                           self.origconf['maxStartTime'],self.origconf['minNoteDuration'],
                                           self.origconf['maxNoteDuration'],self.origconf['minNoteCount'],
                                           self.origconf['maxNoteCount'],self.origconf['currentPopulation'],
                                           self.origconf['bucket'], self.origconf['hitRewardPerAssignment'],
                                           self.origconf['mturkLayoutID'],self.origconf['hitTitle'],
                                           self.origconf['hitKeywords'],self.origconf['hitDescription']))
        self.engine.execute('CREATE TABLE population (title VARCHAR(100) NOT NULL,\
                                                 PRIMARY KEY(title)\
                                                 ) ENGINE = MYISAM;')
        self.engine.execute('CREATE TABLE song (id INT NOT NULL AUTO_INCREMENT,\
                                           title VARCHAR(100),\
                                           population VARCHAR(100),\
                                           ppq INT,\
                                           CONSTRAINT fk_1 FOREIGN KEY (`population`) REFERENCES population (title),\
                                           PRIMARY KEY(id,title)\
                                           ) ENGINE = MYISAM;')
        self.engine.execute('CREATE TABLE event (songID INT,\
                                            track INT,\
                                            id INT NOT NULL AUTO_INCREMENT,\
                                            type VARCHAR(5),\
                                            pitch INT,\
                                            value INT,\
                                            startTime FLOAT,\
                                            duration FLOAT,\
                                            velocity INT,\
                                            CONSTRAINT fk_1 FOREIGN KEY (`songID`) REFERENCES song (id),\
                                            PRIMARY KEY(songID,track,id)\
                                            ) ENGINE = MYISAM;')

    def setPopulation(self,title):
        self.engine.execute('UPDATE conf SET currentPopulation="%s";' % title)
        self.conf['currentPopulation'] = title
    def newPopulation(self,title):
        return Population(self,title)
    def getCurrentPopulation(self):
        if self.conf['currentPopulation'] == '':
            raise self.PopulationNotSet
        return Population(self,self.conf['currentPopulation'])
    def getPopulation(self,title):
        return Population(self,title,create=False)
    def listHITs(self):
        print " ".join(('%s','%s','%s','%s','%s')) % ("HIT ID".ljust(30), "Status".ljust(22), "Amount", "Song1", "Song2")
        for i in self.mtc.search_hits(response_groups=['Request','Minimal','HITDetail','HITQuestion']):
            l = re.findall('<input type="hidden" value="([^"]+?)" name="song[12]" />',i.Question)
            print ' '.join((i.HITId, i.HITStatus,i.HITReviewStatus,i.Amount,l[0].split('/')[-1],l[1].split('/')[-1]))
    def deleteHITs(self):
        for i in self.mtc.get_reviewable_hits():
            self.mtc.dispose_hit(i.HITId)
    def approveAssignment(self,aID):
        self.mtc.approve_assignment(aID)
    def rejectAssignment(self,aID):
        self.mtc.reject_assignment(aID)
    def approveAllAssignments(self):
        for i in self.mtc.get_reviewable_hits():
            for assignment in self.mtc.get_assignments(i.HITId):
                if assignment.AssignmentStatus=="Submitted": 
                    self.mtc.approve_assignment(assignment.AssignmentId)
    def getResults(self):
        print " ".join(('%s','%s','%s','%s','%s')) % ("Assignment ID".ljust(30), "Worker ID".ljust(14), "Song1", "Song2", "Answer")
        for i in self.mtc.get_reviewable_hits():
            for assignment in self.mtc.get_assignments(i.HITId):
                ans = {}
                for j in assignment.answers[0]:
                    if j.qid!='commit': ans[j.qid] = j.fields[0].split('/')[-1]
                print assignment.AssignmentId, assignment.WorkerId,ans['song1'], ans['song2'], ans['boxradio']
Exemplo n.º 28
0
rev_hits = waitUntilHIT1Complete(mtc,hitIds)

possibleAns = defaultdict(Set)

for hit in rev_hits:
	if hit.HITId in hitIds:
		assignments = mtc.get_assignments(hit.HITId)
		for assignment in assignments:
			#print("Answers of the worker %s" % assignment.WorkerId)
			for question_form_answer in assignment.answers[0]:
				for value in question_form_answer.fields:
					#print("%s: %s" % (hitsDic[hit.HITId],value))
					possibleAns[hitsDic[hit.HITId]].add(value)
			#print("--------------------")
			mtc.approve_assignment(assignment.AssignmentId)
		mtc.disable_hit(hit.HITId)

print('Creating the second stage HITS')

hitIds = Set()
answersDic = {}

for key, val in possibleAns.iteritems():
	sentence, context = key
	hitId, answers = createHIT2(val,sentence,context)
	hitIds.add(hitId)
	hitsDic[hitId] = (sentence, context)
	answersDic[sentence] = answers

rev_hits = waitUntilHIT1Complete(mtc,hitIds)
Exemplo n.º 29
0
class MTurk(object):
    """
    A class that wraps a boto.mturk.connection object and provides methods for
    the most common AI2 use cases
    """
    def __init__(self,
                 aws_access_key_id,
                 aws_secret_access_key,
                 host=SANDBOX_HOST):
        """
        initializes the instance with AWS credentials and a host
        :param aws_access_key_id the access key id.
        :param aws_secret_access_key the secret access key.
        :param host the mturk host to connect to
        """
        self.connection = MTurkConnection(
            aws_access_key_id=aws_access_key_id,
            aws_secret_access_key=aws_secret_access_key,
            host=host)
        self.host = host

    def __del__(self):
        """
        close the connection whenever this object goes out of scope
        """
        self.connection.close()

    def get_account_balance(self):
        """
        :return the balance on the mturk account
        """
        return self.connection.get_account_balance()[0]

    def _create_hit(self, params, **kwargs):
        """
        internal helper function for creating a HIT
        :param params the parameters (required and optional) common to all HITs
        :param **kwargs any other parameters needed for a specific HIT type
        :return the created HIT object
        """
        return self.connection.create_hit(
            title=params["title"],
            description=params["description"],
            keywords=params["keywords"],
            max_assignments=params["max_assignments"],
            reward=Price(amount=params["amount"]),
            qualifications=params["qualifications"],
            lifetime=params["lifetime"],
            # optional params below
            annotation=params.get("annotation"),
            **kwargs)

    def create_url_hit(self, params):
        """
        creates a HIT for an external question with a specified URL
        :param params a dict of the HIT parameters. must contain a "url" parameter
        :return the created HIT object
        """
        question = ExternalQuestion(params["url"], params["frame_height"])
        return self._create_hit(params, question=question)

    def create_html_hit(self, params):
        """
        creates a HIT for a question with the specified HTML
        :param params a dict of the HIT parameters, must contain a "html" parameter
        :return the created HIT object
        """
        question = HTMLQuestion(params["html"], params["frame_height"])
        return self._create_hit(params, question=question)

    def create_layout_hit(self, params):
        """
        creates a HIT for a question using the supplied layout id
        :param params a dict of the HIT parameters, must contain a "hit_layout"
               parameters with the layout id, and a "layout_params" parameter
               that's the dict of parameters to feed to the layout.
        """
        # create the LayoutParameters object from the supplied params
        layout_params = LayoutParameters([
            LayoutParameter(name, value)
            for name, value in params["layout_params"]
        ])

        return self._create_hit(params,
                                hit_layout=params["hit_layout"],
                                layout_params=layout_params)

    def delete_all_hits(self):
        """
        Permanently disables/ deletes all of the user's active HITs.
        :param mturk_connection: active mturk connection established by user in the notebook.
        :return:
        """
        my_hits = list(self.get_all_hits())
        for hit in my_hits:
            self.connection.disable_hit(hit.HITId)

    def get_assignments_object_list(self, assignment_dict):
        """
        Returns a list of "<boto.mturk.connection.Assignment object at...>" objects
        assignment_dict: a dictionary of HITId-assignment object pairs
        """
        assignments = []
        for entry in assignment_dict:
            for assignment_object in assignment_dict[entry]:
                assignments.append(assignment_object)
        return assignments

    def get_results_dict(self, HIT_assignments):
        """
        Takes a list of HIT assignment objects as input.
        Returns a list of dictionaries of HITs containing:
        HIT_id: the HIT ID
        worker_id: the worker ID of the Turker who completed the HIT
        answers: a dictionary of qid-answer field value pairs
        """
        assignment_results = []
        for assignment in HIT_assignments:
            HIT_dict = {}
            HIT_dict["assignment_object"] = assignment
            HIT_dict["worker_Id"] = assignment.WorkerId
            HIT_dict["HIT_id"] = assignment.HITId
            answers_dict = {}
            for answer in assignment.answers[0]:
                answers_dict[answer.qid] = answer.fields
                HIT_dict["answers"] = answers_dict
            assignment_results.append(HIT_dict)
        return assignment_results

    def get_all_results(self, hits):
        all_results = {}
        for hid, assignments in self.get_assignments(hits).items():
            all_results[hid] = self.get_results_dict(assignments)
        return all_results

    def get_reviewable_hits(self, annotations=None, detailed=False):
        """
        Get all the reviewable HITs. By default returns minimal HIT objects, but
        will return detailed ones (by necessity) if annotations is specified or
        if detailed is True
        :param annotations an optional set of annotations to retrieve HITs for
        :param detailed do you want detailed HIT objects or minimal ones
        :return a list of HIT objects
        """
        minimal_hits = []
        page_num = 1
        while True:
            more_hits = self.connection.get_reviewable_hits(
                page_size=100, page_number=page_num)
            if more_hits:
                minimal_hits.extend(more_hits)
                page_num += 1
            else:
                break

        if detailed or annotations is not None:
            detailed_hits = [
                self.connection.get_hit(hit.HITId,
                                        response_groups=('Minimal',
                                                         'HITDetail'))
                for hit in minimal_hits
            ]
            return [
                hit for hit in detailed_hits
                if annotation_filter(annotations, hit)
            ]
        else:
            return minimal_hits

    def get_all_hits(self, annotations=None):
        """
        Get all the HITs.
        :param annotations a set of annotations to get HITs for, all HITs if
               not specified
        :return a list of HIT objects
        """

        return [
            hit for hit in self.connection.get_all_hits()
            if annotation_filter(annotations, hit)
        ]

    def get_assignments(self, hits=None, hit_ids=None, status=None):
        """
        Retrieves individual assignments associated with the supplied HITs
        :param hits the HITs to get assignments for
        :status HIT status to filter by
        :return dict from HITId to lists of assignments
        """
        if hit_ids is None:
            hit_ids = [hit.HITId for hit in hits]
        return {
            hit_id: self.connection.get_assignments(hit_id, status=status)
            for hit_id in hit_ids
        }

    def disable_hit(self, hit=None, hit_id=None):
        """
        disable the specified hit (or the hit with the specified id). must
        specify either `hit` or `hit_id`
        :param hit a HIT object to disable
        :param hit_id a HITId to disable
        """
        hit_id = hit.HITId if hit is not None else hit_id
        return self.connection.disable_hit(hit_id)

    def approve_assignment(self,
                           assignment=None,
                           assignment_id=None,
                           feedback=None):
        """
        approve the specified assignment (or the assigment with the specified id)
        must specify either `assignment` or `assignment_id`
        :param assignment an assignment object to approve
        :param assignment_id an AssignmentId to approve
        :param feedback optional feedback for the worker
        """
        assignment_id = assignment.AssignmentId if assignment is not None else assignment_id
        return self.connection.approve_assignment(assignment_id, feedback)

    def reject_assignment(self,
                          assignment=None,
                          assignment_id=None,
                          feedback=None):
        """
        reject the specified assignment (or the assigment with the specified id)
        must specify either `assignment` or `assignment_id`
        :param assignment an assignment object to reject
        :param assignment_id an AssignmentId to reject
        :param feedback optional feedback for the worker
        """
        assignment_id = assignment.AssignmentId if assignment is not None else assignment_id
        return self.connection.reject_assignment(assignment_id, feedback)
Exemplo n.º 30
0
class MTurkClient:

    # SETUP
    # ===========
           
    def __init__(self,aws_access_key,aws_secret_key,aws_mode):
        self.mode = aws_mode
        if aws_mode == 'sandbox':
            self.host = 'mechanicalturk.sandbox.amazonaws.com'
        else:
            self.host = 'mechanicalturk.amazonaws.com'

        self.c = MTurkConnection(
            aws_access_key,
            aws_secret_key,
            host=self.host)
            
    default_settings = {
        'lifetime': DAY,
        'duration': 10 * MINUTE,
        'approval_delay': DAY,

        'title': "[title]",
        'description': "[description]",
        'keywords': [],

        'reward': 0.01,
        'max_assignments': 1,
        
        'height': 700,
        
        'qualifications': [],
    }
            
    # HITS
    # ===========
    def create_hit(self,url,extra_settings):
        "Eventually, this should take a TEMPLATE and a dictionary of INPUT data that's put into that template. This function would then create an HTML file locally (assuming we're running on a web server) by replacing template {tags} with input values, and then send the URL to the newly created page to MTurk."
       
        settings = self.default_settings.copy()
        settings.update(extra_settings)

        settings['reward'] = Price(settings['reward'])
        settings['qualifications'] = qualification.Qualifications(settings['qualifications'])
        settings['keywords'] = ','.join(settings['keywords'])
        height = settings.pop('height')

        hit = self.c.create_hit(question=ExternalQuestion(url,height),**settings)[0]
        #print 'Created hit %s' % hit.HITId
        return hit.HITId,hit.HITTypeId
        
        #hit_type=None, # Let Amazon do this automatically
        #annotation=None, # Optional annotation for our system to use
        #questions=None, # If you want to create multiple HITs at a time? Probably irrelevant for External
        #response_groups=None, # Unclear what this does 
        
    def get_hit(self,hit_id):
        return self.c.get_hit(hit_id)[0]
        
    def hit_results(self,hit_id,type=None): # type in ['Submitted','Approved','Rejected',None]
        results = {}
    
        assignments = self.c.get_assignments(hit_id, status=None, page_size=100)
        for asst in assignments:
            results.setdefault(asst.AssignmentId,{})
            answers = asst.answers[0]
            for qfa in answers:
                field, response = qfa.qid, qfa.fields[0]
                results[asst.AssignmentId][field] = response
                
            results[asst.AssignmentId]['worker_id'] = asst.WorkerId
                       
            results[asst.AssignmentId]['accept_time'] = datetime.strptime(asst.AcceptTime,"%Y-%m-%dT%H:%M:%SZ")
            results[asst.AssignmentId]['submit_time'] = datetime.strptime(asst.SubmitTime,"%Y-%m-%dT%H:%M:%SZ")
                
        return results
        
    # URL of a HIT on MTurk
    def hit_url_turk(self,hit_id):
        pass
        
    def hit_url_external(self,hit_id):
        pass
        
    def extend_hit(self,hit_id,extras):
        return self.c.extend_hit(hit_id, extras)
        
    @catcherror
    def delete_hit(self,hit_id):
        self.c.disable_hit(hit_id)
        
    # Deletes all the HITS on the server. Risky!
    def cleanup(self):
        for hit in self.c.get_all_hits():
            self.delete_hit(hit.HITId)
            
    # ASSIGNMENTS
    # ===========
    @catcherror
    def approve(self, asst_id, feedback=None):
        return self.c.approve_assignment(asst_id, feedback)
        
    @catcherror
    def reject(self, asst_id, feedback=None):
        return self.c.reject_assignment(asst_id, feedback)

    def block(self,worker_id,feedback=None):
        return self.c.block_worker(worker_id, feedback)
        
    def unblock(self,worker_id,feedback=None):
        return self.c.unblock_worker(worker_id, feedback)
        
    def bonus(self,asst,amount,feedback):
        return self.c.grant_bonus(asst.worker, asst.asst_id, Price(amount), feedback)
        
    # STATUS / DIAGNOSTICS
    # --------------------
    def balance(self):
        return self.c.get_account_balance()[0]
Exemplo n.º 31
0
    def post(self):

        payload = self.request.get("payload")

        logging.debug('payload = ' + str(payload))

        self.response.headers['Content-Type'] = 'text/plain'
        self.response.out.write(str(payload))

        queue = Queue(name="monotranssubmit")

        conn = MTurkConnection(
            aws_access_key_id=settings.settings["aws_access_key_id"],
            aws_secret_access_key=settings.settings["aws_secret_access_key"],
            host=settings.settings["service_url"].replace("https://", ""))

        payload = json.loads(payload)

        assignments = conn.get_assignments(hit_id=payload["HITId"])

        for assgnmnt in assignments:

            logging.debug('assgnmnt = ' + str(assgnmnt))

            #print assgnmnt
            mturk_worker_id = assgnmnt.WorkerId
            mturk_assignment_id = assgnmnt.AssignmentId
            submit_time = assgnmnt.SubmitTime
            accept_time = assgnmnt.AcceptTime
            #autoapproval_time=assgnmnt.AutoApprovalTime
            #mturk_status=assgnmnt.AssignmentStatus
            #approval_time=None
            #rejection_time=None

            utc = datetime.datetime.strptime(submit_time, '%Y-%m-%dT%H:%M:%SZ')
            submit_time = utc

            utc = datetime.datetime.strptime(accept_time, '%Y-%m-%dT%H:%M:%SZ')
            accept_time = utc

            print mturk_worker_id
            print accept_time, submit_time, submit_time - accept_time

            results = {}
            for i in assgnmnt.answers[0]:
                #print i
                results[i.qid] = i.fields[0]
                pass

            result = json.dumps(results)
            logging.debug('result = ' + str(result))

            if "data" in results:
                #print results["data"]
                data = json.loads(results["data"])

            try:
                conn.approve_assignment(
                    mturk_assignment_id,
                    "Thank you for working on my HITs, have a good day!")

                #TODO: submit data to Monotrans queue
                task = Task(url='/monotranssubmit', params={"payload": result})
                queue.add(task)
            except:
                #print "already approved"

                #TODO: remove next two lines - this is just for DEBUG
                #task=Task(url='/monotranssubmit', params={"payload":result})
                #queue.add(task)

                pass
Exemplo n.º 32
0
class TranscriptionPipelineHandler():
    def __init__(self):
        aws_id = os.environ['AWS_ACCESS_KEY_ID']
        aws_k = os.environ['AWS_ACCESS_KEY']

        self.conn = MTurkConnection(aws_access_key_id=aws_id,\
                          aws_secret_access_key=aws_k,\
                          host=HOST)

        self.ah = AssignmentHandler(self.conn)
        self.th = TurkerHandler(self.conn)
        self.hh = HitHandler(self.conn, TEMPLATE_DIR)
        self.mh = MongoTranscriptionHandler()
        self.wh = WavHandler()
        self.ph = PromptHandler()
        self.filter = Filter(self.mh)
        self.balance = self.conn.get_account_balance()[0].amount
        self.logger = logging.getLogger(
            "transcription_engine.transcription_pipeline_handler")

    def audio_clip_referenced_to_hit(self, priority=1, max_queue_size=10):
        for audio_clip in self.mh.get_artifacts_by_state(
                "audio_clips", "Referenced"):
            audio_clip_id = audio_clip["_id"]
            self.mh.queue_clip(audio_clip_id, priority, max_queue_size)
            response = self.audio_clip_queue_to_hit()

    def audio_clip_queued_to_hit(self, priority=1, max_queue_size=10):
        for audio_clip in self.mh.get_artifacts("audio_clips",
                                                {"state": "Queued"}):
            audio_clip_id = audio_clip["_id"]
            response = self.audio_clip_queue_to_hit()
            #===================================================================
            # elif state == "Hit":
            #     print("In hit: %s"%audio_clip_url)
            #===================================================================

    def audio_clip_queue_to_hit(self, cost_sensitive=True):
        """Take queued audio clips from the audio clip queue
            put them in a hit and create the hit.
            If successful, update the audio clip state."""
        clip_queue = self.mh.get_audio_clip_queue()
        clip_pairs = self.mh.get_audio_clip_pairs(clip_queue)
        if clip_pairs:
            hit_title = "Audio Transcription"
            question_title = "List and Transcribe"
            description = "Transcribe the audio clip by typing the words the person says in order."
            keywords = "audio, transcription, audio transcription"
            if cost_sensitive:
                reward_per_clip = 0.02
                max_assignments = 3
                estimated_cost = self.hh.estimate_html_HIT_cost(
                    clip_pairs, reward_per_clip, max_assignments)
                clips_in_hits = self.mh.clips_already_in_hit(clip_pairs)
                if clips_in_hits:
                    #If one or more clips are already in a HIT, remove it from the queue
                    self.mh.remove_audio_clips_from_queue(clips_in_hits)
                elif self.balance - estimated_cost >= 250:
                    #if we have enough money, create the HIT
                    response = self.hh.make_html_transcription_HIT(
                        clip_pairs, hit_title, question_title, description,
                        keywords)
                    self.balance = self.balance - estimated_cost
                    if type(response) == ResultSet and len(
                            response) == 1 and response[0].IsValid:
                        response = response[0]
                        self.mh.remove_audio_clips_from_queue(clip_queue)
                        audio_clip_ids = [
                            w["audio_clip_id"] for w in clip_queue
                        ]
                        hit_id = response.HITId
                        hit_type_id = response.HITTypeId
                        self.mh.create_transcription_hit_artifact(
                            hit_id, hit_type_id, clip_queue, "New")
                        self.logger.info("Successfully created HIT: %s" %
                                         hit_id)
                        return self.mh.update_audio_clips_state(
                            audio_clip_ids, "Hit")
                else:
                    pass
        return False

    def load_assignments_hit_to_submitted(self):
        """Check all assignments for audio clip IDs.
            Update the audio clips.
            This is a non-destructive load of the assignments from MTurk"""
        hits = self.conn.get_all_hits()
        for hit in hits:
            transcription_dicts = [{}]
            hit_id = hit.HITId
            assignments = self.conn.get_assignments(hit_id)
            have_all_assignments = True
            assignment_ids = []
            for assignment in assignments:
                assignment_ids.append(assignment.AssignmentId)
                if self.mh.get_artifact("assignments",
                                        {"_id": assignment.AssignmentId}):
                    #We create assignments here, so if we already have it, skip
                    continue
                else:
                    have_all_assignments = False
                transcription_ids = []
                transcription_dicts = self.ah.get_assignment_submitted_transcriptions(
                    assignment)
                if transcription_dicts and len(transcription_dicts) == 10:
                    pass
                for transcription in transcription_dicts:
                    if not self.mh.get_artifact_by_id(
                            "audio_clips", transcription["audio_clip_id"]):
                        self.logger.info("Assignment(%s) with unknown audio clip(%s) skipped"%\
                                    (assignment.AssignmentId,transcription["audio_clip_id"]))
                        break
                    self.mh.update_transcription_state(transcription,
                                                       "Submitted")
                    self.mh.update_audio_clips_state(
                        [transcription["audio_clip_id"]], "Submitted")
                    transcription_ids.append(
                        self.mh.get_artifact(
                            "transcriptions", {
                                "audio_clip_id":
                                transcription["audio_clip_id"],
                                "assignment_id": transcription["assignment_id"]
                            }, "_id"))
                else:
                    self.mh.create_assignment_artifact(assignment,
                                                       transcription_ids,
                                                       "Submitted")
            if assignments and not have_all_assignments:
                self.mh.update_transcription_hit_state(hit_id, "Submitted")
            print("Transcriptions HIT(%s) submitted assignments: %s " %
                  (hit_id, assignment_ids))

    def assignment_submitted_approved(self):
        """For all submitted assignments,
            if an answered question has a reference transcription,
            check the WER.
            If all the answered questions with reference transcriptions
            have an acceptable WER, approve the assignment and update
            the audio clips and transcriptions."""
        assignments = self.mh.get_artifacts_by_state("assignments",
                                                     "Submitted")
        rejected_feedback = "I'm sorry but your work in assignment(%s) was rejected because" +\
                            " one or more of your transcriptions " +\
                            " had a word error rate above the maximum acceptable"+\
                            " word error rate of %s. Omitted words and words that "+\
                            " differed by more than %s "+\
                            " characters were counted as an error."
        accepted_feedback = "Your average word error rate on assignment(%s) was %s."+\
                            " Assignment accepted! Thanks for your hard work."
        for assignment in assignments:
            assignment_id = assignment["_id"]
            transcription_ids = assignment["transcriptions"]
            transcriptions = self.mh.get_artifacts("transcriptions", "_id",
                                                   transcription_ids)

            worker_id = assignment["worker_id"]
            worker_id = self.mh.create_worker_artifact(worker_id)

            approved, average_wer = self.filter.approve_assignment(
                transcriptions)
            if approved:
                try:
                    self.conn.approve_assignment(
                        assignment_id,
                        accepted_feedback % (assignment_id, average_wer))
                except MTurkRequestError as e:
                    print(e)
                else:
                    self.mh.update_assignment_state(assignment, "Approved")
                    for transcription in transcriptions:
                        #Approve transcriptions without references in the same assignment
                        reference_id = self.mh.get_artifact_by_id(
                            "audio_clips", transcription["audio_clip_id"],
                            "reference_transcription_id")
                        if not reference_id:
                            self.mh.update_transcription_state(
                                transcription, "Approved")
                    print("Approved transcription ids: %s" % transcription_ids)
            else:
                #Don't deny for now
                feedback = rejected_feedback % (assignment_id,
                                                self.filter.WER_THRESHOLD,
                                                self.filter.CER_THRESHOLD)
                self.logger.info(feedback)
                self.conn.reject_assignment(assignment_id, feedback)
                self.mh.update_assignment_state(assignment, "Denied")
                #print("Assignments not aproved %s "%denied)
            #Update the worker
            if approved:
                self.mh.add_assignment_to_worker(worker_id,
                                                 (assignment_id, average_wer))

    def _load_rm_audio_source_file_to_clipped(
            self,
            file_dir,
            prompt_file_uri,
            base_clip_dir,
            sample_rate=16000,
            http_base_url="http://www.cis.upenn.edu/~tturpen/wavs/",
            init_clip_count=200):
        """For an audio directory,
            see which files are new and not an audio source already
            """
        prompt_dict = self.ph.get_prompts(prompt_file_uri)
        count = 0
        for root, dirs, files in os.walk(file_dir):
            for f in files:
                if count == init_clip_count:
                    return
                system_uri = os.path.join(root, f)
                out_uri = system_uri.strip(".sph") + ".wav"
                out_uri = os.path.basename(out_uri)
                out_uri = os.path.join(root, (out_uri))
                spkr_id = str(os.path.relpath(root, file_dir))
                #sph to wav
                if not f.endswith(".wav") and not os.path.exists(out_uri):
                    try:
                        self.wh.sph_to_wav(system_uri, out_uri=out_uri)
                    except WavHandlerException as e:
                        self.logger.error("Unable to create wav from sph: " +
                                          str(e))

                if os.path.exists(out_uri) and out_uri.endswith(".wav"):
                    #create audio source artifact
                    count += 1
                    wav_filename = os.path.basename(out_uri)
                    prompt_id = os.path.basename(out_uri).strip(".wav").upper()
                    encoding = ".wav"
                    sample_rate = 16000
                    disk_space = os.stat(out_uri).st_size
                    length_seconds = self.wh.get_audio_length(out_uri)
                    if prompt_id in prompt_dict:
                        transcription_prompt = prompt_dict[prompt_id]
                    else:
                        #No prompt found
                        raise PromptNotFound
                    source_id = self.mh.create_audio_source_artifact(
                        out_uri, disk_space, length_seconds, sample_rate,
                        spkr_id, encoding)
                    #create audio clip artifact
                    audio_clip_uri = os.path.join(base_clip_dir, spkr_id,
                                                  wav_filename)
                    clip_dir = os.path.dirname(audio_clip_uri)
                    if not os.path.exists(clip_dir):
                        os.makedirs(clip_dir)
                    if not os.path.exists(audio_clip_uri):
                        copyfile(out_uri, audio_clip_uri)
                    #http_url
                    http_url = os.path.join(http_base_url, spkr_id,
                                            wav_filename)
                    clip_id = self.mh.create_audio_clip_artifact(
                        source_id, 0, -1, audio_clip_uri, http_url,
                        length_seconds, disk_space)

                    #Update the audio source, updates state too
                    self.mh.update_audio_source_audio_clip(source_id, clip_id)

                    #Create the reference transcription artifact
                    transcription_id = self.mh.create_reference_transcription_artifact(
                        clip_id, transcription_prompt, "Gold")
                    #Completes audio clip to Referenced
                    self.mh.update_audio_clip_reference_transcription(
                        clip_id, transcription_id)

    def all_workers_liveness(self):
        workers = self.mh.get_all_workers()
        for worker in workers:
            worker_id = worker["_id"]
            approved, denied = self.mh.get_worker_assignments(worker)
            print("Worker(%s) assignments, approved(%s) denied(%s)" %
                  (worker["_id"], approved, denied))
            selection = input(
                "1. Show denied transcriptions and references.\n" +
                "2. Show accepted transcriptions and references.\n" +
                "3. Show both denied and accepted transcriptions.")
            if selection == 1 or selection == 3:
                print("Approved transcriptions")
                for assignment_id in approved:
                    transcription_pairs = self.mh.get_transcription_pairs(
                        assignment_id)
                    for pair in transcription_pairs:
                        print("Reference:\n\t%s\nHypothesis:\n\t%s\n" %
                              (pair[0], pair[1]))
            if selection == 2 or selection == 3:
                print("Denied transcriptions")
                for assignment_id in denied:
                    transcription_pairs = self.mh.get_transcription_pairs(
                        assignment_id)
                    for pair in transcription_pairs:
                        print("Reference:\n\t%s\nHypothesis:\n\t%s\n" %
                              (pair[0], pair[1]))

    def stats(self):
        workers = self.mh.get_all_workers()
        all_wer_per_approved_assignment = 0.0
        total_accepted = 0.0
        for worker in workers:
            worker_wer = 0.0
            worker_id = worker["_id"]
            approved, denied = self.mh.get_worker_assignments_wer(worker)
            for w in approved:
                all_wer_per_approved_assignment += float(w[1])
                worker_wer += float(w[1])
                total_accepted += 1
            if approved:
                worker_average_wer = worker_wer / len(approved)
                print("%s,%s" % (len(approved), worker_average_wer))
            #print("Worker(%s) approved assignments(%s)\n denied assignments(%s)"%(worker_id,approved,denied))
        av = all_wer_per_approved_assignment / total_accepted
        print("Average WER per assignment(%s)" % (av))

    def get_assignment_stats(self):
        self.effective_hourly_wage_for_approved_assignments(.20)

    def effective_hourly_wage_for_approved_assignments(self,
                                                       reward_per_assignment):
        """Calculate the effective hourly wage for Approved Assignments"""
        approved_assignments = self.mh.get_artifacts_by_state(
            "assignments", "Approved")
        total = datetime.timedelta(0)
        count = 0
        for assignment in approved_assignments:
            if "SubmitTime" in assignment:
                accepted = datetime.datetime.strptime(assignment["AcceptTime"],
                                                      "%Y-%m-%dT%H:%M:%SZ")
                submitted = datetime.datetime.strptime(
                    assignment["SubmitTime"], "%Y-%m-%dT%H:%M:%SZ")
            else:
                pass
            total += submitted - accepted
            count += 1
        seconds_per_assignment = total.total_seconds() / count
        effective_hourly_wage = 60.0 * 60.0 / seconds_per_assignment * reward_per_assignment
        print("Effective completion time(%s) *reward(%s) = %s" %
              (seconds_per_assignment, reward_per_assignment,
               effective_hourly_wage))

    def allhits_liveness(self):
        #allassignments = self.conn.get_assignments(hit_id)
        #first = self.ah.get_submitted_transcriptions(hit_id,str(clipid))

        hits = self.conn.get_all_hits()
        for hit in hits:
            hit_id = hit.HITId
            print("HIT ID: %s" % hit_id)
            assignments = self.conn.get_assignments(hit_id)
            if len(assignments) == 0:
                if raw_input("Remove hit with no submitted assignments?(y/n)"
                             ) == "y":
                    try:
                        self.conn.disable_hit(hit_id)
                        clips = self.mh.get_artifact("transcription_hits",
                                                     {"_id": hit_id}, "clips")
                        self.mh.remove_transcription_hit(hit_id)
                        self.mh.update_audio_clips_state(clips, "Referenced")
                    except MTurkRequestError as e:
                        raise e
            else:
                if raw_input("Remove hit with %s submitted assignments?(y/n)" %
                             len(assignments)) == "y":
                    try:
                        self.conn.disable_hit(hit_id)
                    except MTurkRequestError as e:
                        raise e

    def run(self):
        audio_file_dir = "/home/taylor/data/corpora/LDC/LDC93S3A/rm_comp/rm1_audio1/rm1/ind_trn"
        #audio_file_dir = "/home/taylor/data/corpora/LDC/LDC93S3A/rm_comp/rm1_audio1/rm1/dep_trn"
        prompt_file_uri = "/home/taylor/data/corpora/LDC/LDC93S3A/rm_comp/rm1_audio1/rm1/doc/al_sents.snr"
        base_clip_dir = "/home/taylor/data/corpora/LDC/LDC93S3A/rm_comp/rm1_audio1/rm1/clips"
        selection = 0
        init_clip_count = 10000
        while selection != "11":
            selection = raw_input(
                """Audio Source file to Audio Clip Approved Pipeline:\n
                                     1: AudioSource-FileToClipped: Initialize Resource Management audio source files to %d queueable(Referenced) clips
                                     2: AudioClip-ReferencedToHit: Queue all referenced audio clips and create a HIT if the queue is full.
                                     3: AudioClip-HitToSubmitted: Check all submitted assignments for Transcriptions.
                                     4: AudioClip-SubmittedToApproved: Check all submitted clips against their reference.
                                     5: Review Current Hits
                                     6: Worker liveness
                                     7: Account balance
                                     8: Worker stats
                                     9: Recalculate worker WER                                     
                                     10: Assignment Stats
                                     11: Exit
                                    """ % init_clip_count)
            #selection = "5"
            if selection == "1":
                self._load_rm_audio_source_file_to_clipped(
                    audio_file_dir,
                    prompt_file_uri,
                    base_clip_dir,
                    init_clip_count=init_clip_count)
            elif selection == "2":
                self.audio_clip_referenced_to_hit()
            elif selection == "3":
                self.load_assignments_hit_to_submitted()
            elif selection == "4":
                self.assignment_submitted_approved()
            elif selection == "5":
                self.allhits_liveness()
            elif selection == "6":
                self.all_workers_liveness()
            elif selection == "7":
                print("Account balance: %s" % self.balance)
            elif selection == "8":
                self.stats()
            elif selection == "9":
                self.recalculate_worker_assignment_wer()
            elif selection == "10":
                self.get_assignment_stats()


#     def get_time_submitted_for_assignments(self):
#         assignments = self.mh.get_all_artifacts("assignments")
#         for assignment in assignments:
#             assignment_id = assignment["_id"]
#             a_assignment = self.conn.get_assignment(assignment_id)[0]
#             self.mh.update_artifact_by_id("assignments", assignment_id, "SubmitTime", a_assignment.SubmitTime)

#     def recalculate_worker_assignment_wer(self):
#         """For all submitted assignments,
#             if an answered question has a reference transcription,
#             check the WER.
#             If all the answered questions with reference transcriptions
#             have an acceptable WER, approve the assignment and update
#             the audio clips and transcriptions."""
#         assignments = self.mh.get_artifacts("assignments",{"state":"Approved"})
#         for assignment in assignments:
#             assignment_id = assignment["_id"]
#             denied = []
#             #If no transcriptions have references then we automatically approve the HIT
#             approved = True
#             transcription_ids = assignment["transcriptions"]
#             transcriptions = self.mh.get_transcriptions("_id",transcription_ids)
#             worker_id = assignment["worker_id"]
#             worker_id = self.mh.create_worker_artifact(worker_id)
#
#             max_rej_wer = (0.0,0.0)
#             total_wer = 0.0
#             for transcription in transcriptions:
#                 #Normalize the transcription
#                 #self.mh.normalize_transcription
#                 reference_id = self.mh.get_audio_clip_by_id(transcription["audio_clip_id"],"reference_transcription_id")
#                 if reference_id:
#                     reference_transcription = self.mh.get_reference_transcription({"_id": reference_id},
#                                                                                   "transcription")
#                     new_transcription = transcription["transcription"].split(" ")
#                     if reference_transcription:
#                         transcription_wer = cer_wer(reference_transcription,new_transcription)
#                         total_wer += transcription_wer
#                         if transcription_wer < WER_THRESHOLD:
#                             self.logger.info("WER for transcription(%s) %d"%(transcription["transcription"],transcription_wer))
#                         else:
#                             max_rej_wer = (transcription_wer,WER_THRESHOLD)
#                             denied.append((reference_transcription,new_transcription))
#                             approved = False
#             average_wer = total_wer/len(transcriptions)
#             #Update the worker
#             self.mh.add_assignment_to_worker(worker_id,(assignment_id,average_wer))
Exemplo n.º 33
0
        hit, = conn.get_hit(args.hit, ['HITDetail', 'HITAssignmentSummary'])
        total = int(hit.MaxAssignments)
        pending = int(hit.NumberOfAssignmentsPending)
        complete = int(hit.NumberOfAssignmentsCompleted)
        available = int(hit.NumberOfAssignmentsAvailable)
        logging.info("max:%s/pending:%s/complete:%s/remain:%s", total, pending,
                     complete, available)

        # check if we have reached the total
        if total >= args.total:
            logging.info("MaxAssignments = %s, exiting", total)
            break

        # compute how many assignments are currently outstanding
        current = available + pending
        if current < args.concurrent:
            diff = min(args.total - total, args.concurrent - current)
            logging.info("Extending HIT with %s more assignments", diff)
            conn.extend_hit(args.hit, assignments_increment=diff)

        # get submitted assignments and approve them
        if args.approve:
            assignments = conn.get_assignments(args.hit,
                                               status="Submitted",
                                               page_size=100)
            for assignment in assignments:
                logging.info("Approving assignment %s",
                             assignment.AssignmentId)
                conn.approve_assignment(assignment.AssignmentId, feedback=None)

        time.sleep(args.interval)
Exemplo n.º 34
0
class MTurkProvider(object):
    description = 'This is a task authored by a requester on Daemo, a research crowdsourcing platform. ' \
                  'Mechanical Turk workers are welcome to do it'
    keywords = ['daemo']
    countries = ['US', 'CA']
    min_hits = 1000

    def __init__(self, host, aws_access_key_id, aws_secret_access_key):
        self.host = host
        self.connection = MTurkConnection(
            aws_access_key_id=aws_access_key_id,
            aws_secret_access_key=aws_secret_access_key,
            host=settings.MTURK_HOST
        )
        self.connection.APIVersion = "2014-08-15"
        if not self.host:
            raise ValueError("Please provide a host url")

    def get_connection(self):
        return self.connection

    @staticmethod
    def _mturk_system_qualifications(qualification):
        requirements = []
        for item in qualification.items.all():
            if item.expression['attribute'] not in ['location', 'approval_rate', 'total_tasks']:
                continue
            requirement = None
            if item.expression['attribute'] == 'location':
                op = OP_IN if item.expression['operator'] == 'in' else OP_NOT_IN
                requirement = MultiLocaleRequirement(op, [val.strip() for val in item.expression['value'] if
                                                          val is not None and val != ''])
            elif item.expression['attribute'] == 'approval_rate':
                op = OP_GT if item.expression['operator'] == 'gt' else OP_LT
                requirement = PercentAssignmentsApprovedRequirement(op, item.expression['value'])
            elif item.expression['attribute'] == 'total_tasks':
                op = OP_GT if item.expression['operator'] == 'gt' else OP_LT
                requirement = NumberHitsApprovedRequirement(op, item.expression['value'])

            requirements.append(requirement)
        return requirements

    def get_qualifications(self, project, boomerang_threshold, add_boomerang):
        requirements = []
        if project.qualification is not None:
            requirements += self._mturk_system_qualifications(project.qualification)
        boomerang_qual, success = self.create_qualification_type(owner_id=project.owner_id,
                                                                 project_id=project.group_id,
                                                                 name='Boomerang Score #{}'.format(project.group_id),
                                                                 flag=FLAG_Q_BOOMERANG,
                                                                 description='No description available')
        boomerang = None
        if boomerang_threshold <= int(settings.BOOMERANG_MIDPOINT * 100):
            for i, bucket in enumerate(WAIT_LIST_BUCKETS):
                if int(bucket[1] * 100) <= boomerang_threshold:

                    boomerang_blacklist, success = \
                        self.create_qualification_type(owner_id=project.owner_id,
                                                       name='Boomerang Waitlist #{}-{}'.format(project.group_id, len(
                                                           WAIT_LIST_BUCKETS) - i),
                                                       flag=FLAG_Q_BOOMERANG,
                                                       description='No description available',
                                                       deny=True,
                                                       project_id=project.group_id,
                                                       bucket=bucket)
                    if success and add_boomerang:
                        boomerang = BoomerangRequirement(qualification_type_id=boomerang_blacklist.type_id,
                                                         comparator=OP_DNE,
                                                         integer_value=None)
                        requirements.append(boomerang)

        else:
            boomerang = BoomerangRequirement(qualification_type_id=boomerang_qual.type_id, comparator=OP_GTEQ,
                                             integer_value=boomerang_threshold)
            if success and add_boomerang:
                requirements.append(boomerang)
        return Qualifications(requirements), boomerang_qual

    def create_hits(self, project, tasks=None, repetition=None):
        # if project.min_rating > 0:
        #     return 'NOOP'
        if not tasks:
            cursor = connection.cursor()
            # noinspection SqlResolve
            query = '''
                SELECT
                  max(id)                   id,
                  repetition,
                  group_id,
                  repetition - sum(existing_assignments) remaining_assignments,
                  min_rating
                FROM (
                       SELECT
                         t_rev.id,
                         t.group_id,
                         t.min_rating,
                         p.repetition,
                         CASE WHEN ma.id IS NULL OR ma.status IN (%(skipped)s, %(rejected)s, %(expired)s)
                           THEN 0
                         ELSE 1 END existing_assignments
                       FROM crowdsourcing_task t
                         INNER JOIN crowdsourcing_project p ON t.project_id = p.id
                         INNER JOIN crowdsourcing_task t_rev ON t_rev.group_id = t.group_id
                         LEFT OUTER JOIN mturk_mturkhit mh ON mh.task_id = t_rev.id
                         LEFT OUTER JOIN mturk_mturkassignment ma ON ma.hit_id = mh.id
                       WHERE t.project_id = (%(project_id)s) AND t_rev.exclude_at IS NULL
                       AND t_rev.deleted_at IS NULL
                ) t
                GROUP BY group_id, repetition, min_rating HAVING sum(existing_assignments) < repetition;
            '''
            cursor.execute(query, {'skipped': TaskWorker.STATUS_SKIPPED,
                                   'rejected': TaskWorker.STATUS_REJECTED,
                                   'expired': TaskWorker.STATUS_EXPIRED,
                                   'project_id': project.id})
            tasks = cursor.fetchall()

        rated_workers = Rating.objects.filter(origin_type=Rating.RATING_REQUESTER).count()
        add_boomerang = rated_workers > 0

        duration = project.timeout if project.timeout is not None else datetime.timedelta(hours=24)
        lifetime = project.deadline - timezone.now() if project.deadline is not None else datetime.timedelta(
            days=7)

        for task in tasks:
            question = self.create_external_question(task[0])
            mturk_hit = MTurkHIT.objects.filter(task_id=task[0]).first()
            qualifications, boomerang_qual = self.get_qualifications(project=project,
                                                                     boomerang_threshold=int(
                                                                         round(task[4], 2) * 100),
                                                                     add_boomerang=add_boomerang)
            qualifications_mask = 0
            if qualifications is not None:
                qualifications_mask = FLAG_Q_LOCALE + FLAG_Q_HITS + FLAG_Q_RATE + FLAG_Q_BOOMERANG
            hit_type, success = self.create_hit_type(title=project.name, description=self.description,
                                                     price=project.price,
                                                     duration=duration, keywords=self.keywords,
                                                     approval_delay=datetime.timedelta(days=2),
                                                     qual_req=qualifications,
                                                     qualifications_mask=qualifications_mask,
                                                     boomerang_threshold=int(round(task[4], 2) * 100),
                                                     owner_id=project.owner_id, boomerang_qual=boomerang_qual)
            if not success:
                return 'FAILURE'

            if mturk_hit is None:
                try:
                    hit = self.connection.create_hit(hit_type=hit_type.string_id,
                                                     max_assignments=task[3],
                                                     lifetime=lifetime,
                                                     question=question)[0]
                    self.set_notification(hit_type_id=hit.HITTypeId)
                    mturk_hit = MTurkHIT(hit_id=hit.HITId, hit_type=hit_type, task_id=task[0])
                except MTurkRequestError as e:
                    error = e.errors[0][0]
                    if error == 'AWS.MechanicalTurk.InsufficientFunds':
                        message = {
                            "type": "ERROR",
                            "detail": "Insufficient funds on your Mechanical Turk account!",
                            "code": error
                        }

                        redis_publisher = RedisPublisher(facility='bot', users=[project.owner])
                        message = RedisMessage(json.dumps(message))
                        redis_publisher.publish_message(message)
                    return 'FAILED'
            else:
                if mturk_hit.hit_type_id != hit_type.id:
                    result, success = self.change_hit_type_of_hit(hit_id=mturk_hit.hit_id,
                                                                  hit_type_id=hit_type.string_id)
                    if success:
                        mturk_hit.hit_type = hit_type
            mturk_hit.save()
        return 'SUCCESS'

    def create_hit_type(self, owner_id, title, description, price, duration, boomerang_threshold, keywords=None,
                        approval_delay=None, qual_req=None,
                        qualifications_mask=0, boomerang_qual=None):
        hit_type = MTurkHITType.objects.filter(owner_id=owner_id, name=title, description=description,
                                               price=Decimal(str(price)),
                                               duration=duration,
                                               qualifications_mask=qualifications_mask,
                                               boomerang_threshold=boomerang_threshold).first()
        if hit_type is not None:
            return hit_type, True

        reward = Price(price)
        try:
            mturk_ht = self.connection.register_hit_type(title=title, description=description, reward=reward,
                                                         duration=duration, keywords=keywords,
                                                         approval_delay=approval_delay,
                                                         qual_req=qual_req)[0]
            hit_type = MTurkHITType(owner_id=owner_id, name=title, description=description,
                                    price=Decimal(str(price)),
                                    keywords=keywords, duration=duration,
                                    qualifications_mask=qualifications_mask,
                                    boomerang_qualification=boomerang_qual,
                                    boomerang_threshold=boomerang_threshold)
            hit_type.string_id = mturk_ht.HITTypeId
            hit_type.save()
        except MTurkRequestError:
            return None, False
        return hit_type, True

    def create_external_question(self, task, frame_height=800):
        task_hash = Hashids(salt=settings.SECRET_KEY, min_length=settings.ID_HASH_MIN_LENGTH)
        task_id = task_hash.encode(task)
        url = self.host + '/mturk/task/?taskId=' + task_id
        question = ExternalQuestion(external_url=url, frame_height=frame_height)
        return question

    def update_max_assignments(self, task):
        task = Task.objects.get(id=task['id'])
        mturk_hit = task.mturk_hit
        if not mturk_hit:
            raise MTurkHIT.DoesNotExist("This task is not associated to any mturk hit")
        assignments_completed = task.task_workers.filter(~Q(status__in=[TaskWorker.STATUS_REJECTED,
                                                                        TaskWorker.STATUS_SKIPPED,
                                                                        TaskWorker.STATUS_EXPIRED])).count()
        remaining_assignments = task.project.repetition - assignments_completed
        if remaining_assignments > 0 and mturk_hit.num_assignments == mturk_hit.mturk_assignments. \
            filter(status=TaskWorker.STATUS_SUBMITTED).count() and \
                mturk_hit.mturk_assignments.filter(status=TaskWorker.STATUS_IN_PROGRESS).count() == 0:
            self.add_assignments(hit_id=mturk_hit.hit_id, increment=1)
            self.extend_hit(hit_id=mturk_hit.hit_id)
            mturk_hit.status = MTurkHIT.STATUS_IN_PROGRESS
            mturk_hit.num_assignments += 1
            mturk_hit.save()
        elif remaining_assignments == 0:
            self.expire_hit(hit_id=mturk_hit.hit_id)
            mturk_hit.status = MTurkHIT.STATUS_EXPIRED
            mturk_hit.save()
        elif remaining_assignments > 0 and \
                mturk_hit.status == MTurkHIT.STATUS_EXPIRED:
            self.extend_hit(hit_id=mturk_hit.hit_id)
            mturk_hit.status = MTurkHIT.STATUS_IN_PROGRESS
        return 'SUCCESS'

    def get_assignment(self, assignment_id):
        try:
            return self.connection.get_assignment(assignment_id)[0], True
        except MTurkRequestError as e:
            error = e.errors[0][0]
            if error == 'AWS.MechanicalTurk.InvalidAssignmentState':
                return assignment_id, False
            return None, False

    def set_notification(self, hit_type_id):
        self.connection.set_rest_notification(hit_type=hit_type_id,
                                              url=self.host + '/api/mturk/notification',
                                              event_types=['AssignmentReturned', 'AssignmentAbandoned',
                                                           'AssignmentAccepted', 'AssignmentSubmitted'])

    def approve_assignment(self, task_worker):
        task_worker_obj = TaskWorker.objects.get(id=task_worker['id'])
        if hasattr(task_worker_obj, 'mturk_assignments') and task_worker_obj.mturk_assignments.first() is not None:
            try:
                self.connection.approve_assignment(task_worker_obj.mturk_assignments.first().assignment_id)
            except MTurkRequestError:
                return False
        return True

    def reject_assignment(self, task_worker):
        task_worker_obj = TaskWorker.objects.get(id=task_worker['id'])
        if hasattr(task_worker_obj, 'mturk_assignments') and task_worker_obj.mturk_assignments.first() is not None:
            try:
                self.connection.reject_assignment(task_worker_obj.mturk_assignments.first().assignment_id)
            except MTurkRequestError:
                return False
        return True

    def expire_hit(self, hit_id):
        try:
            self.connection.expire_hit(hit_id)
        except MTurkRequestError:
            return False
        return True

    def disable_hit(self, hit_id):
        try:
            self.connection.disable_hit(hit_id)
        except MTurkRequestError:
            return False
        return True

    def extend_hit(self, hit_id):
        try:
            self.connection.extend_hit(hit_id=hit_id, expiration_increment=604800)  # 7 days
        except MTurkRequestError:
            return False
        return True

    def add_assignments(self, hit_id, increment=1):
        try:
            self.connection.extend_hit(hit_id=hit_id, assignments_increment=increment)
        except MTurkRequestError:
            return False
        return True

    def test_connection(self):
        try:
            return self.connection.get_account_balance()[0], True
        except MTurkRequestError as e:
            error = e.errors[0][0]
            if error == 'AWS.NotAuthorized':
                return None, False
            return None, False

    def get_account_balance(self):
        try:
            return self.connection.get_account_balance()[0]
        except MTurkRequestError:
            return None

    def create_qualification_type(self, owner_id, name, flag, description, project_id, auto_granted=False,
                                  auto_granted_value=None, deny=False, bucket=None):
        # noinspection SqlResolve
        query = '''
            SELECT * FROM (
                SELECT
                  task.target_id,
                  task.username,
                  round(task.task_w_avg::NUMERIC, 2) rating
                  --round(coalesce(task.task_w_avg, requester.requester_w_avg,
                  --  platform.platform_w_avg)::NUMERIC, 2) rating
                FROM (
                               SELECT
                                 target_id,
                                 origin_id,
                                 project_id,
                                 username,
                                 sum(weight * power((%(BOOMERANG_TASK_ALPHA)s), t.row_number))
                                 / sum(power((%(BOOMERANG_TASK_ALPHA)s), t.row_number)) task_w_avg
                               FROM (

                                      SELECT
                                        r.id,
                                        r.origin_id,
                                        p.group_id                              project_id,
                                        weight,
                                        r.target_id,
                                        -1 + row_number()
                                        OVER (PARTITION BY target_id
                                          ORDER BY tw.created_at DESC) AS row_number,
                                          u.username username

                                      FROM crowdsourcing_rating r
                                        INNER JOIN crowdsourcing_task t ON t.id = r.task_id
                                        INNER JOIN crowdsourcing_project p ON p.id = t.project_id
                                        INNER JOIN crowdsourcing_taskworker tw ON t.id = tw.task_id
                                          AND tw.worker_id=r.target_id
                                        INNER JOIN auth_user u ON u.id = r.target_id
                                      WHERE origin_id = (%(origin_id)s) AND origin_type = (%(origin_type)s)) t
                               GROUP BY origin_id, target_id, project_id, username)
                             task WHERE task.project_id = (%(project_id)s)
            ) r
        '''
        extra_query = 'WHERE rating BETWEEN (%(lower_bound)s) AND (%(upper_bound)s);'
        params = {
            'origin_type': Rating.RATING_REQUESTER, 'origin_id': owner_id, 'project_id': project_id,
            'BOOMERANG_REQUESTER_ALPHA': settings.BOOMERANG_REQUESTER_ALPHA,
            'BOOMERANG_PLATFORM_ALPHA': settings.BOOMERANG_PLATFORM_ALPHA,
            'BOOMERANG_TASK_ALPHA': settings.BOOMERANG_TASK_ALPHA
        }
        obj_params = {'upper_bound': 300, 'lower_bound': 100}
        if deny and bucket is not None:
            query += extra_query
            params.update({'upper_bound': bucket[1], 'lower_bound': bucket[0]})
            obj_params.update({'upper_bound': bucket[1] * 100, 'lower_bound': bucket[0] * 100, 'is_blacklist': True})
        cursor = connection.cursor()
        cursor.execute(query, params=params)
        worker_ratings_raw = cursor.fetchall()
        worker_ratings = [{"worker_id": r[0], "worker_username": r[1], "rating": r[2]} for
                          r in worker_ratings_raw]

        qualification = MTurkQualification.objects.filter(owner_id=owner_id, flag=flag, name=name).first()
        assigned_workers = []
        if qualification is None:
            try:
                qualification_type = self.connection. \
                    create_qualification_type(name=name, description=description,
                                              status='Active',
                                              auto_granted=auto_granted,
                                              auto_granted_value=auto_granted_value)[0]
                qualification = MTurkQualification.objects.create(owner_id=owner_id, flag=flag, name=name,
                                                                  description=description,
                                                                  auto_granted=auto_granted,
                                                                  auto_granted_value=auto_granted_value,
                                                                  type_id=qualification_type.QualificationTypeId,
                                                                  **obj_params)
            except MTurkRequestError:
                return None, False
        else:
            assigned_workers = MTurkWorkerQualification.objects.values('worker').filter(
                qualification=qualification).values_list('worker', flat=True)

        for rating in worker_ratings:
            user_name = rating["worker_username"].split('.')
            if len(user_name) == 2 and user_name[0] == 'mturk':
                mturk_worker_id = user_name[1].upper()
                if mturk_worker_id not in assigned_workers:
                    self.assign_qualification(
                        qualification_type_id=qualification.type_id, worker_id=mturk_worker_id,
                        value=int(rating['rating'] * 100))
                defaults = {
                    'qualification': qualification,
                    'worker': mturk_worker_id,
                    'score': int(rating['rating'] * 100)
                }
                MTurkWorkerQualification.objects.update_or_create(qualification=qualification,
                                                                  worker=mturk_worker_id,
                                                                  defaults=defaults)
        return qualification, True

    def change_hit_type_of_hit(self, hit_id, hit_type_id):
        try:
            result = self.connection.change_hit_type_of_hit(hit_id=hit_id, hit_type=hit_type_id)
        except MTurkRequestError:
            return None, False
        return result, True

    def update_worker_boomerang(self, project_id, worker_id, task_avg, requester_avg):
        """
        Update boomerang for project
        Args:
            project_id:
            worker_id:
            task_avg:
            requester_avg

        Returns:
            str
        """
        hit = MTurkHIT.objects.select_related('hit_type__boomerang_qualification').filter(
            task__project__group_id=project_id).first()
        if hit is not None:
            qualification = hit.hit_type.boomerang_qualification
            worker_qual = MTurkWorkerQualification.objects.filter(qualification=qualification,
                                                                  worker=worker_id).first()
            if worker_qual is not None:
                self.update_score(worker_qual, score=int(task_avg * 100), override=True)
            else:
                MTurkWorkerQualification.objects.create(qualification=qualification, worker=worker_id,
                                                        score=int(task_avg * 100), overwritten=True)
                self.assign_qualification(qualification_type_id=qualification.type_id, worker_id=worker_id,
                                          value=int(task_avg * 100))

                # other_quals = MTurkWorkerQualification.objects.filter(~Q(qualification=qualification),
                #                                                       worker=worker_id,
                #                                                       overwritten=False)
                # for q in other_quals:
                #     self.update_score(q, score=int(requester_avg * 100))
        return 'SUCCESS'

    def update_score(self, worker_qual, score, override=False):
        if worker_qual is None:
            return False
        try:
            self.connection.update_qualification_score(worker_qual.qualification.type_id, worker_qual.worker, score)
            worker_qual.overwritten = override
            worker_qual.score = score
            worker_qual.save()
        except MTurkRequestError:
            return False
        return True

    def assign_qualification(self, qualification_type_id, worker_id,
                             value=1):
        """
        Revoke a qualification from a WorkerId
        Args:
            qualification_type_id:
            worker_id:
            value

        Returns:
            bool
        """
        try:
            self.connection.assign_qualification(qualification_type_id, worker_id,
                                                 value, send_notification=False)
            return True
        except MTurkRequestError:
            return False

    def revoke_qualification(self, qualification_type_id, worker_id):
        try:
            self.connection.revoke_qualification(qualification_type_id=qualification_type_id, subject_id=worker_id)
            return True
        except MTurkRequestError:
            return False

    def notify_workers(self, worker_ids, subject, message_text):
        try:
            self.connection.notify_workers(worker_ids, subject, message_text)
            return True
        except MTurkRequestError:
            return False
Exemplo n.º 35
0
class amusic(object):
    engine = None
    origconf = {'minPitch':12,'maxPitch':84,'minStartTime':0.0,'maxStartTime':200.0,
                'minNoteDuration':0.5,'maxNoteDuration':5.0,'minNoteCount':50,'maxNoteCount':400,
                'currentPopulation':'','bucket':None,'hitRewardPerAssignment':0.05,'mturkLayoutID':None,
                'hitTitle':None,'hitDescription':None,'hitKeywords':None}
    conf = origconf.copy()
    s3bucket = None
    mtc = None
    class PopulationNotSet( Exception ): pass
    class SongNotFound( Exception ): pass
    def __init__(self,username,password,ACCESS_ID,SECRET_KEY,initialize=False):
        self.ACCESS_ID = ACCESS_ID
        self.SECRET_KEY = SECRET_KEY
        self.engine = sqlalchemy.create_engine('mysql+mysqlconnector://%s:%s@localhost' % (username,password))
        self.engine.connect()
        self.mtc = MTurkConnection(self.ACCESS_ID,self.SECRET_KEY,host='mechanicalturk.sandbox.amazonaws.com')
        try:
            self.engine.execute('USE amusic;')
            c = self.engine.execute('SELECT * FROM conf;').fetchone()
            self.conf['minPitch'] = c[0]
            self.conf['maxPitch'] = c[1]
            self.conf['minStartTime'] = c[2]
            self.conf['maxStartTime'] = c[3]
            self.conf['minNoteDuration'] = c[4]
            self.conf['maxNoteDuration'] = c[5]
            self.conf['minNoteCount'] = c[6]
            self.conf['maxNoteCount'] = c[7]
            self.conf['currentPopulation'] = c[8]
            self.conf['bucket'] = c[9]
            self.conf['mturkLayoutID'] = c[10]
            self.conf['hitTitle'] = c[11]
            self.conf['hitDescription'] = c[12]
            self.conf['hitKeywords'] = c[13]
            self.conf['hitRewardPerAssignment'] = c[14]
        except:pass
    def inits3(self):
        c = boto.connect_s3(self.ACCESS_ID,self.SECRET_KEY)
        self.s3bucket = c.create_bucket(self.conf['bucket'])
        self.s3bucket.set_acl('public-read')
    def initialize(self):
        try: self.engine.execute('DROP DATABASE IF EXISTS amusic;')
        except: pass
        self.engine.execute('CREATE DATABASE amusic;')
        self.engine.execute('USE amusic;')
        self.engine.execute('CREATE TABLE conf (minPitch INT,\
                                           maxPitch INT,\
                                           minStartTime FLOAT,\
                                           maxStartTime FLOAT,\
                                           minNoteDuration FLOAT,\
                                           maxNoteDuration FLOAT,\
                                           minNoteCount INT,\
                                           maxNoteCount INT,\
                                           currentPopulation VARCHAR(100),\
                                           bucket VARCHAR(100),\
                                           mturkLayoutID VARCHAR(100),\
                                           hitTitle VARCHAR(100),\
                                           hitDescription VARCHAR(100),\
                                           hitKeywords VARCHAR(100),\
                                           hitRewardPerAssignment FLOAT,\
                                           CONSTRAINT fk_1 FOREIGN KEY (`currentPopulation`) REFERENCES population (title),\
                                           PRIMARY KEY(minPitch)\
                                           ) ENGINE = MYISAM;')
        self.engine.execute('INSERT INTO conf (minPitch,\
                                          maxPitch,\
                                          minStartTime,\
                                          maxStartTime,\
                                          minNoteDuration,\
                                          maxNoteDuration,\
                                          minNoteCount,\
                                          maxNoteCount,\
                                          currentPopulation,\
                                          bucket,\
                                          hitRewardPerAssignment,\
                                          mturkLayoutID,\
                                          hitTitle,\
                                          hitKeywords,\
                                          hitDescription)\
                                          VALUES(%d,%d,%f,%f,%f,%f,%d,%d,"%s","%s",%f,"%s","%s","%s","%s");'%
                                          (self.origconf['minPitch'],self.origconf['maxPitch'],self.origconf['minStartTime'],
                                           self.origconf['maxStartTime'],self.origconf['minNoteDuration'],
                                           self.origconf['maxNoteDuration'],self.origconf['minNoteCount'],
                                           self.origconf['maxNoteCount'],self.origconf['currentPopulation'],
                                           self.origconf['bucket'], self.origconf['hitRewardPerAssignment'],
                                           self.origconf['mturkLayoutID'],self.origconf['hitTitle'],
                                           self.origconf['hitKeywords'],self.origconf['hitDescription']))
        self.engine.execute('CREATE TABLE population (title VARCHAR(100) NOT NULL,\
                                                 PRIMARY KEY(title)\
                                                 ) ENGINE = MYISAM;')
        self.engine.execute('CREATE TABLE song (id INT NOT NULL AUTO_INCREMENT,\
                                           title VARCHAR(100),\
                                           population VARCHAR(100),\
                                           ppq INT,\
                                           CONSTRAINT fk_1 FOREIGN KEY (`population`) REFERENCES population (title),\
                                           PRIMARY KEY(id,title)\
                                           ) ENGINE = MYISAM;')
        self.engine.execute('CREATE TABLE event (songID INT,\
                                            track INT,\
                                            id INT NOT NULL AUTO_INCREMENT,\
                                            type VARCHAR(5),\
                                            pitch INT,\
                                            value INT,\
                                            startTime FLOAT,\
                                            duration FLOAT,\
                                            velocity INT,\
                                            CONSTRAINT fk_1 FOREIGN KEY (`songID`) REFERENCES song (id),\
                                            PRIMARY KEY(songID,track,id)\
                                            ) ENGINE = MYISAM;')

    def setPopulation(self,title):
        self.engine.execute('UPDATE conf SET currentPopulation="%s";' % title)
        self.conf['currentPopulation'] = title
    def newPopulation(self,title):
        return Population(self,title)
    def getCurrentPopulation(self):
        if self.conf['currentPopulation'] == '':
            raise self.PopulationNotSet
        return Population(self,self.conf['currentPopulation'])
    def getPopulation(self,title):
        return Population(self,title,create=False)
    def listHITs(self):
        print " ".join(('%s','%s','%s','%s','%s')) % ("HIT ID".ljust(30), "Status".ljust(22), "Amount", "Song1", "Song2")
        for i in self.mtc.search_hits(response_groups=['Request','Minimal','HITDetail','HITQuestion']):
            l = re.findall('<input type="hidden" value="([^"]+?)" name="song[12]" />',i.Question)
            print ' '.join((i.HITId, i.HITStatus,i.HITReviewStatus,i.Amount,l[0].split('/')[-1],l[1].split('/')[-1]))
    def deleteHITs(self):
        for i in self.mtc.get_reviewable_hits():
            self.mtc.dispose_hit(i.HITId)
    def approveAssignment(self,aID):
        self.mtc.approve_assignment(aID)
    def rejectAssignment(self,aID):
        self.mtc.reject_assignment(aID)
    def approveAllAssignments(self):
        for i in self.mtc.get_reviewable_hits():
            for assignment in self.mtc.get_assignments(i.HITId):
                if assignment.AssignmentStatus=="Submitted": 
                    self.mtc.approve_assignment(assignment.AssignmentId)
    def getResults(self):
        print " ".join(('%s','%s','%s','%s','%s')) % ("Assignment ID".ljust(30), "Worker ID".ljust(14), "Song1", "Song2", "Answer")
        for i in self.mtc.get_reviewable_hits():
            for assignment in self.mtc.get_assignments(i.HITId):
                ans = {}
                for j in assignment.answers[0]:
                    if j.qid!='commit': ans[j.qid] = j.fields[0].split('/')[-1]
                print assignment.AssignmentId, assignment.WorkerId,ans['song1'], ans['song2'], ans['boxradio']
Exemplo n.º 36
0
# read in all workerids that have already been approved.  

workerids_file = open('%s' % current_round_file, 'r')
already_approved = workerids_file.readlines()
workerids_file.close()

# for each assignment, check if workerid has already been approved. If not, approve assignment & append workerid to list. 
    
workerids_file = open('%s' % current_round_file, 'a')    
for x in range (0, len(already_approved)):
    already_approved[x] = already_approved[x].rstrip('\n')

for a in assignments:
    if a.WorkerId not in already_approved: 
        if auto_approve_yesno.lower() == 'yes':
            mturk_connect.approve_assignment(a.AssignmentId)
        workerids_file.write(a.WorkerId)
        workerids_file.write('\n')
        
# if it's the first round, and the Block Past Workers option has been selected, assign this qual to all workers completing the hit.
        
        if rounds_so_far == 1:
            if Block_past_workers_yesno.lower() == 'yes':
                quals_file =  open('quals.txt', 'r')
                quals_ids = quals_file.readlines()
                quals_file.close()   
                Block_past_workers_id = quals_ids[0].rstrip('\n')
                mturk_connect.assign_qualification(Block_past_workers_id, a.WorkerId, send_notification = False)
   
workerids_file.close()
Exemplo n.º 37
0
  def post(self):

	payload=self.request.get("payload")
			
	logging.debug('payload = ' + str(payload))
	
	self.response.headers['Content-Type'] = 'text/plain'
	self.response.out.write(str(payload))
	
	queue=Queue(name="monotranssubmit")

	conn=MTurkConnection(aws_access_key_id=settings.settings["aws_access_key_id"],
		                      aws_secret_access_key=settings.settings["aws_secret_access_key"],
		                      host=settings.settings["service_url"].replace("https://",""))
		
	payload=json.loads(payload)
	
	assignments=conn.get_assignments(hit_id=payload["HITId"])

	for assgnmnt in assignments:

		logging.debug('assgnmnt = ' + str(assgnmnt))
		
		#print assgnmnt
		mturk_worker_id=assgnmnt.WorkerId
		mturk_assignment_id=assgnmnt.AssignmentId
		submit_time=assgnmnt.SubmitTime
		accept_time=assgnmnt.AcceptTime
		#autoapproval_time=assgnmnt.AutoApprovalTime
		#mturk_status=assgnmnt.AssignmentStatus
		#approval_time=None
		#rejection_time=None
		
		utc = datetime.datetime.strptime(submit_time, '%Y-%m-%dT%H:%M:%SZ')
		submit_time=utc

		utc = datetime.datetime.strptime(accept_time, '%Y-%m-%dT%H:%M:%SZ')
		accept_time=utc

		
		
		print mturk_worker_id
		print accept_time, submit_time, submit_time-accept_time
		
		results={}
		for i in assgnmnt.answers[0]:
			#print i
			results[i.qid]=i.fields[0]
			pass

		result=json.dumps(results)
		logging.debug('result = ' + str(result))
		
		
		if "data" in results:
			#print results["data"]
			data=json.loads(results["data"])

		try:
			conn.approve_assignment(mturk_assignment_id, "Thank you for working on my HITs, have a good day!")

			#TODO: submit data to Monotrans queue
			task=Task(url='/monotranssubmit', params={"payload":result})
			queue.add(task)
		except:
			#print "already approved"

			#TODO: remove next two lines - this is just for DEBUG
			#task=Task(url='/monotranssubmit', params={"payload":result})
			#queue.add(task)

			pass
Exemplo n.º 38
0
                    approve = True

            

           # print '%s\t%s'%(question_form_answer.qid, question_form_answer.fields[0])
    
   

    if approve == False:
        for assignment in assignments:
            for question_form_answer in assignment.answers[0]:
                f.writelines(question_form_answer.qid.encode('ascii', 'ignore'))
            conn.reject_assignment(assignment.AssignmentId)
        conn.disable_hit(hit.HITId)
    else:
        for assignment in assignments:
            for question_form_answer in assignment.answers[0]:
                f1.write(question_form_answer.qid.encode('ascii', 'ignore'))
                f1.write(question_form_answer.fields[0].encode('ascii', 'ignore') + '\n')
            conn.approve_assignment(assignment.AssignmentId)
        conn.disable_hit(hit.HITId)
        

     

    	#Uncomment to approve assignment. Approving will remove this assignment from reviewable HITs, so store the data before approving 

    
    #Uncomment to remove all remaining assignments that have not been completed and approved/rejected