Exemplo n.º 1
def make_usernames_csv_for_turk():
    print "Creating csv file of usernames for evaluation on Mechanical Turk..."
    twitter_site = short_text_websites.get_twitter_site()
    usernames_to_evaluate = crosssite_username_dataset_mgr.get_usernames_to_evaluate_mturk(twitter_site)
    print str(len(usernames_to_evaluate))+" unconfirmed usernames available:"
    print usernames_to_evaluate
    # dump to csv
    username_rows = [[username] for username in usernames_to_evaluate]
    csv_util.write_to_spreadsheet(__usernames_to_judge_csv_path__, username_rows, None)
Exemplo n.º 2
def make_tweet_entities_csv_for_turk():
    twitter_site = short_text_websites.get_twitter_site()
    entities_to_evaluate = entity_dataset_mgr.get_valid_ne_candidates(twitter_site)
    if entities_to_evaluate is None:
        print "No ambiguous entities + candidates in cache. Run run_all_dataset_generators "+\
        "script and choose to first fetch and store more entities from short texts."
    judged_row_plus_headers = csv_util.query_csv_for_rows(__entities_results_csv_path__, False)
    judged_row_num = 0
    already_judged = [] # list of (entity id, candidate link)
    for judge_row in judged_row_plus_headers:
            if judged_row_num==0: # row 0 is header
                entity_id_col = judge_row.index('Input.entity_id')
                candidate_link_col = judge_row.index('Input.candidate_link') 
                judged_tuple = (judge_row[entity_id_col], judge_row[candidate_link_col])
                if not judged_tuple in already_judged:
            judged_row_num = judged_row_num+1    
            continue # just ignore a problematic row      
    # Determine what entity+candidate tasks we actually want to write to a spreadsheet 
    # and send to mturk since we don't have resources for unlimited mturk tasks
    tasks = {} # NamedEntity object -> candidate judgment tasks we actually want performed
    user_entities = defaultdict(list) # username -> [NamedEntity obj]
    done_shorttexts = [] # list of shorttext id
    random.shuffle(entities_to_evaluate) # so we get a random subset of a user's entities
    for ne_obj in entities_to_evaluate:
        # "40 nouns usually enough to establish statistically significant 
        # differences between WSD algorithms" (Santamaria et al., 2010)
        username = ne_obj.username
        if len(user_entities[username]) > 50:
            continue # have enough entities for this user
        # limiting our dataset to one named entity per short text
        shorttext_id = ne_obj.shorttext_id
        if shorttext_id in done_shorttexts:
        # no need to create tasks for candidates we already have annotator judgments for
        entity_id = ne_obj.get_entity_id()
        candidate_URLs = ne_obj.get_candidate_wikiURLs()
        valid_candidate_tasks = []
        for candidate_URL in candidate_URLs:
            if ((entity_id, candidate_URL) in already_judged):
        if len(valid_candidate_tasks)==0:
            continue # already have annotator judgments for all of this entity's candidates
        if len(candidate_URLs)+len(valid_candidate_tasks) < 2:
            # this would be a non-ambiguous entity, and we should never reach this 
            # point because such entities should have been filtered out by now
        tasks[entity_id] = valid_candidate_tasks
    # put valid entities + candidates in the spreadsheet until reach our limit of tasks
    task_max = 1400    
    rows = []
    headers = ['entity_id', 'short_text', 'ambiguous_entity', 'candidate_link']
    for username in user_entities:
        # add users until reach our limit on the number of tasks we can afford, 
        # but break at this point in the loop rather than in the inner loop to
        # ensure that we do have at least 50 entities per user (even if this
        # means we go over our task limit a little in order to reach that amount)
        if len(rows) > task_max:
        # bypass users who haven't written the minimum number of valid entities
        # required to establish statistical significance between the algorithms
        if len(user_entities[username]) < 50:
        # should be 50 NamedEntity objects per user, and we'll make tasks for their candidates
        for ne_obj in user_entities[username]:
            entity_id = ne_obj.get_entity_id()
            # make sure the entity presented to a Turker looks the same as
            # it appears in the short text (ie with the same capitalization)
            original_shorttext = ne_obj.shorttext_str.decode('latin-1')
            surface_form = ne_obj.surface_form
            if not surface_form in original_shorttext:
                surface_form = __match_appearance__(surface_form, original_shorttext)
            # shuffle candidates so that they don't appear
            # in wikiminer's/dbpedia's ranking order and bias the turker
            candidate_URLs = tasks[entity_id]
            choices = candidate_URLs[:] # copy (list slicing)
            for choice in choices:
                # make a separate row for each candidate link 
                # rather than putting all links in a single cell
                row = [entity_id, original_shorttext, surface_form, choice]
            if len(rows)%50==0:
                # write the rows every once in a while in case we reach an error
                print "Updating spreadsheet..."+str(len(rows))
                csv_util.write_to_spreadsheet(__entities_to_judge_csv_path__, rows)
    # dump to csv
    csv_util.write_to_spreadsheet(__entities_to_judge_csv_path__, rows)
Exemplo n.º 3
def prompt_for_site():
    site_input = raw_input('Work with (A) Twitter or (B) Flickr? (Enter A or B): ')
    site = {'A':short_text_websites.get_twitter_site(), 'B':'SITE_FLICKR'}[site_input]
    return site
Exemplo n.º 4
def update_usernames_csv_with_judgments():
    ''' Handles updating the cross-site-usernames 
    spreadsheet given new Mechanical Turker evaluations '''
    print "Updating cross-site-usernames spreadsheet with Mechanical Turker "+\
    "judgments about username matching from spreadsheet of results..."
    twitter_site = short_text_websites.get_twitter_site()
    evaluated_usernames = crosssite_username_dataset_mgr.get_usernames_to_evaluate_mturk(twitter_site)
    judgments = {}
    row_num = 0
    rows_plus_headers = csv_util.query_csv_for_rows(__usernames_results_csv_path__, False)
    for row in rows_plus_headers:
            if row_num==0: # row 0 is header
                username_col = row.index('Input.user')
                turkerID_col = row.index('WorkerId')
                answer_col = row.index('Answer.Q1')
                judged_username = row[username_col]
                if not judged_username in evaluated_usernames:
                    raise # this shouldn't happen so see what's going on..
                if judged_username in judgments:
                    evaluations = judgments[judged_username]
                    evaluations = Turker_Username_Evaluation(judged_username)
                    judgments[judged_username] = evaluations
                workerID = row[turkerID_col]
                judgment = row[answer_col]
                if 'True'==judgment:
                elif 'False'==judgment:
            row_num = row_num+1    
            continue # just ignore a problematic assignment row
    # usernames that meet a threshold indicating that enough
    # of a majority of turkers agreed this is the same individual
    likely_usernames = []
    conflicting_judgments = []
    # usernames that all workers agreed belonged to a single 
    # person (ie ignore usernames that were rejected by any worker)
    unanimous_confirmed_usernames = []
    # Also get usernames that all workers unanimously
    # agreed do NOT belong to a single individual
    unanimous_nonmatch_usernames = []
    for username in judgments:
        if username in __MANUAL_OVERRIDES_TRUE__:
        evaluation = judgments[username]
        eval_score = evaluation.get_eval_measure()
        print "Match score for "+str(username)+": "+str(eval_score)
        if eval_score>=__JUDGMENT_THRESHOLD__:
        elif evaluation.get_number_true_evals()>0:
        # Each username given to 5 turkers to evaluate. 
        if (evaluation.get_number_true_evals()>0 and 
            evaluation.get_number_false_evals()==0 and 
            # all turkers unanimously confirmed this username
        if (evaluation.get_number_false_evals()>0 and
            evaluation.get_number_true_evals()==0 and 
            evaluation.get_number_unknown_evals()==0 and
            not username.lower() in __MANUAL_OVERRIDES_FALSE_UNCLEAR__):
    print "Judged "+str(len(judgments))+" usernames"
    print "Likely matches"+str(len(likely_usernames))
    print "Conflicting judgments:"+str(conflicting_judgments) 
    # Update the judgment cell in the spreadsheet for unanimously confirmed usernames 
    crosssite_username_dataset_mgr.update_confirmed_positive_usernames(twitter_site, likely_usernames)
    print "Updated cross-site-usernames spreadsheet to reflect majority positive confirmations"
    crosssite_username_dataset_mgr.update_confirmed_negative_usernames(twitter_site, unanimous_nonmatch_usernames)
    print "Updated cross-site-usernames spreadsheet to reflect unanimous negative confirmations"