'F1_man', 'F2_man', 'F3_man', 'plt_code', 'plt_stress', 'plt_word', 't_man' ]) + '\n' fw = open(outputFile, 'w') fw.write(header) plt_lines = open(pltFile, 'rb').readlines() skipped_lines = [] # skip the first two lines since they contain header information for plt_line in plt_lines[2:]: print(plt_line) plt_line = UnicodeDammit(plt_line, ['utf-8', 'windows-1252']).unicode_markup plt_line = unidecode(plt_line) plt_line = plt_line.rstrip() plt_F1 = plt_line.split(',')[0] # a line beginning with '1' is the first line of the vowel means; this # signals the end of the vowel token measurements, so we can stop # processing the file if plt_F1 == '1': break plt_w_raw = plt_line.split(',')[5].split(' ')[0] plt_w = plt_w_raw.upper() plt_w = plt_w.replace('(', '') plt_w = plt_w.replace(')', '') print(plt_w) if plt_w not in words: skipped_lines.append(plt_line) print("SKIPPING LINE -- WORD NOT FOUND")