def main(crowdflower_csv, pos_data_dir, output_file, debug): """ Transform the crowdflower results into training data :param file crowdflower_csv: The CSV containing crowdflower data :param str pos_data_dir: The directory containing POS tagging for each sentence :param file output_file: The file in which to write the training data :return: 0 :rtype: int """ results = read_full_results(crowdflower_csv) if debug: print 'Results from crowdflower' print json.dumps(results, indent=2) set_majority_vote_answer(results) if debug: print 'Computed majority vote' print json.dumps(results, indent=2) tag_entities(results) if debug: print 'Entities tagged' print json.dumps(results, indent=2) output = produce_training_data(results, pos_data_dir, debug) for l in output: output_file.write(l.encode('utf-8') + '\n') return 0
def main(crowdflower_output, num_judgments): """ this script computes the agreement of judgments given in the crowdflower job using a metric called Fleiss kappa """ cf_results = read_full_results(crowdflower_output) mat = compute_matrix(cf_results, num_judgments) print computeFleissKappa(mat)
def main(crowdflower_output, num_judgments): """ this script computes the agreement of judgments given in the crowdflower job using a metric called Fleiss kappa :param file crowdflower_output: CSV file containing the results from crowdflower :param int num_judgments: Consider only this number of results, skip if not enough """ cf_results = read_full_results(crowdflower_output) mat = compute_matrix(cf_results, num_judgments) print computeFleissKappa(mat)
def main(crowdflower_csv, pos_data_dir, output_file, debug): results = read_full_results(crowdflower_csv) if debug: print 'Results from crowdflower' print json.dumps(results, indent=2) set_majority_vote_answer(results) if debug: print 'Computed majority vote' print json.dumps(results, indent=2) tag_entities(results) if debug: print 'Entities tagged' print json.dumps(results, indent=2) output = produce_training_data(results, pos_data_dir, debug) for l in output: output_file.write(l.encode('utf-8') + '\n') return 0
def main(crowdflower_output, num_judgments): cf_results = read_full_results(crowdflower_output) mat = compute_matrix(cf_results, num_judgments) print computeFleissKappa(mat)