sentence_aux = sentence.replace('<strong>', '').replace('</strong>', '').replace(',', '') if article_id and pub_date and section_name and web_url and term_category and term_aux and sentence_aux: row_df = DataFrame( [[article_id, pub_date, section_name, web_url, term_category, term_aux, sentence_aux]], columns=['article_id', 'pub_date', 'section_name', 'web_url', 'term_category', 'term', 'sentence'] ) rows_df = rows_df.append(row_df) total_rows_df = total_rows_df.append(rows_df) if count % percent == 0: percentage = count // percent print('{} out of {} processed.'.format(count, total)) print('{}% completed.'.format(percentage)) print() total_rows_df.to_csv('total_rows_{}.csv'.format(percentage), index=False) count +=1 except Exception as e: print('Error:', e) print('Document:', doc) print('{} out of {} processed.'.format(total, total)) print('100% completed.') total_rows_df.to_csv('total_rows_100.csv', index=False) except Exception as e: print('Error:', e) total_rows_df.to_csv('total_rows.csv', index=False)