"text",
    "time_zone",
    "uid",
    "user.name",
    "utc_offset",
    "verified",
    "trainingLabel",
]

html_parser = html

with open(mturk_labeled_filename, 'rb') as mturk_labeled_file_handle:
    mturk_labeled_data_reader = DictReader(
        mturk_labeled_file_handle, fieldnames=mturk_labeled_data_header, encoding='utf-8')
    # skip first
    mturk_labeled_data_reader.__next__()
    # Dictionary to count flags
    flag_count_on_tweets = {}
    for hit in mturk_labeled_data_reader:
        if hit["AssignmentStatus"] != "Approved":
            continue
        tweet_id = hit['Input.id']
        answer = hit['Answer.Q3Answer']
        if tweet_id not in flag_count_on_tweets:
            flag_count_on_tweets[tweet_id] = 0
        if answer != 'N/A':
            flag_count_on_tweets[tweet_id] += 1
    counter = {0: 0, 1: 0, 2: 0, 3: 0}
    with codecs.open(line_separated_tweets_json_file_name, 'r', 'utf8') as line_separated_tweets_handle:
        with open(aml_training_dataset_filename, 'wb') as aml_training_dataset_handle:
            csv_writer = unicodecsv.writer(