Пример #1
0
 def add_corpus(self, corpus, name, password, options, tlds=None):
     now = now_ts()
     yield self.db()["corpus"].insert_one({
       "_id": corpus,
       "name": name,
       "password": salt(password),
       "options": options,
       "total_webentities": 0,
       "webentities_in": 0,
       "webentities_in_untagged": 0,
       "webentities_in_uncrawled": 0,
       "webentities_out": 0,
       "webentities_undecided": 0,
       "webentities_discovered": 0,
       "total_crawls": 0,
       "crawls_pending": 0,
       "crawls_running": 0,
       "total_pages": 0,
       "total_pages_crawled": 0,
       "total_pages_queued": 0,
       "total_links_found": 0,
       "recent_changes": False,
       "last_index_loop": now,
       "links_duration": 1,
       "last_links_loop": 0,
       "tags": Binary(msgpack.packb({})),
       "webentities_links": Binary(msgpack.packb({})),
       "created_at": now,
       "last_activity": now,
       "tlds": tlds
     })
     yield self.init_corpus_indexes(corpus)
Пример #2
0
 def add_corpus(self, corpus, name, password, options, tlds=None):
     now = now_ts()
     yield self.db["corpus"].insert({
       "_id": corpus,
       "name": name,
       "password": salt(password),
       "options": options,
       "total_webentities": 0,
       "webentities_in": 0,
       "webentities_in_untagged": 0,
       "webentities_in_uncrawled": 0,
       "webentities_out": 0,
       "webentities_undecided": 0,
       "webentities_discovered": 0,
       "total_crawls": 0,
       "total_pages": 0,
       "total_pages_crawled": 0,
       "created_at": now,
       "last_activity": now,
       "recent_changes": False,
       "last_index_loop": now,
       "links_duration": 1,
       "last_links_loop": 0,
       "tlds": tlds
     }, safe=True)
     yield self.init_corpus_indexes(corpus)
Пример #3
0
 def add_corpus(self, corpus, name, password, options):
     now = now_ts()
     yield self.db["corpus"].insert({
       "_id": corpus,
       "name": name,
       "password": salt(password),
       "options": options,
       "total_webentities": 0,
       "webentities_in": 0,
       "webentities_out": 0,
       "webentities_undecided": 0,
       "webentities_discovered": 0,
       "total_crawls": 0,
       "total_pages": 0,
       "total_pages_crawled": 0,
       "created_at": now,
       "last_activity": now,
       "last_index_loop": now,
       "last_links_loop": now
     }, safe=True)
     yield self.init_corpus_indexes(corpus)