Python table_to_list示例

编程语言: Python

命名空间/包名称: scrape_tools

方法/功能: table_to_list

hotexamples.com的示例: 3

Python table_to_list - 已找到3个示例。这些是从开源项目中提取的最受好评的scrape_tools.table_to_list现实Python示例。您可以评价示例，以帮助我们提高示例质量。

示例#1

显示文件

def content_scraper(table):

    docs = table_to_list(table)
    for i,doc in enumerate(docs):
        if 'content' in doc.keys():
            continue
        thread = threading.Thread(name=i, target=content_adder_thread, args=(table, doc, i))
        thread.start()
        time.sleep(np.random.random()/3+0.3)

示例#2

显示文件

文件： nyt_scraper.py 项目： zachary-britt/text2slant

    def __init__(self):
        self.i = 0
        self.table = st.open_database_collection('nyt')

        docs = st.table_to_list(self.table)
        self.seen_urls = {doc['web_url'] for doc in docs}

        self.link = 'http://api.nytimes.com/svc/search/v2/articlesearch.json'
        NYT_API_KEY = os.environ['NYT_API_KEY']
        self.payload = {'api-key': NYT_API_KEY}
        self._set_filters()

示例#3

显示文件

文件： database_cleaning.py 项目： zachary-britt/text2slant

def remove_dups(table):
    #ipdb.set_trace()
    docs = st.table_to_list(table)

    # urls = [ doc['link'] for doc in docs]
    # _ids = [ doc['_id'] for doc in docs]

    if 'web_url' in docs[0].keys():
        for i, _ in enumerate(docs):
            docs[i]['link'] = docs[i]['web_url']

    pairs = [(doc['link'], doc['_id']) for doc in docs]
    pair_dict = dict(pairs)
    id_keepers = set(pair_dict.values())
    id_all = {doc['_id'] for doc in docs}

    kill_ids = id_all.difference(id_keepers)

    for _id in kill_ids:
        table.delete_one(filter={'_id': _id})