示例#1
0
                    search_terms.append(info['q']['term'])
                    if info['snippet']:
                        texts.append(info['snippet'])

                for text in texts:
                    sentences = sent_tokenize(text)
                    for sentence in sentences:
                        if any([slugify(search_term) in slugify(sentence) for search_term in search_terms]):
                            term_aux = term.replace('_', '.')
                            sentence_aux = sentence.replace('<strong>', '').replace('</strong>', '').replace(',', '')
                            if article_id and pub_date and section_name and web_url and term_category and term_aux and sentence_aux:
                                row_df = DataFrame(
                                    [[article_id, pub_date, section_name, web_url, term_category, term_aux, sentence_aux]],
                                    columns=['article_id', 'pub_date', 'section_name', 'web_url', 'term_category', 'term', 'sentence']
                                )
                                rows_df = rows_df.append(row_df)

            total_rows_df = total_rows_df.append(rows_df)

            if count % percent == 0:
                percentage = count // percent
                print('{} out of {} processed.'.format(count, total))
                print('{}% completed.'.format(percentage))
                print()
                total_rows_df.to_csv('total_rows_{}.csv'.format(percentage), index=False)
            count +=1

        except Exception as e:
            print('Error:', e)
            print('Document:', doc)