"text", "time_zone", "uid", "user.name", "utc_offset", "verified", "trainingLabel", ] html_parser = html with open(mturk_labeled_filename, 'rb') as mturk_labeled_file_handle: mturk_labeled_data_reader = DictReader( mturk_labeled_file_handle, fieldnames=mturk_labeled_data_header, encoding='utf-8') # skip first mturk_labeled_data_reader.__next__() # Dictionary to count flags flag_count_on_tweets = {} for hit in mturk_labeled_data_reader: if hit["AssignmentStatus"] != "Approved": continue tweet_id = hit['Input.id'] answer = hit['Answer.Q3Answer'] if tweet_id not in flag_count_on_tweets: flag_count_on_tweets[tweet_id] = 0 if answer != 'N/A': flag_count_on_tweets[tweet_id] += 1 counter = {0: 0, 1: 0, 2: 0, 3: 0} with codecs.open(line_separated_tweets_json_file_name, 'r', 'utf8') as line_separated_tweets_handle: with open(aml_training_dataset_filename, 'wb') as aml_training_dataset_handle: csv_writer = unicodecsv.writer(