def index_by(index_dir: str, index_extension: str, data_iter: iter, key_fn: Callable, value_fn: Callable, checkpoint: int, object_name: str): """ Generate UnQlite data indices for each entity :param index_dir index directory :param index_extension index file extension :param data_iter iterable on data :param key_fn function to use on data to get the index key :param value_fn function to use on data to get the index value :param checkpoint commit index every checkpoints :return dict of index paths by entity name """ i = 0 index_path_by_entity = {} index_by_entity = {} for data in data_iter: entity = data['@type'] if entity not in index_path_by_entity: index_path = get_file_path([index_dir, entity], ext=index_extension) index_path_by_entity[entity] = index_path index = UnQLite(index_path_by_entity[entity]) index.begin() index_by_entity[entity] = index index = index_by_entity[entity] # Index index[str(key_fn(data))] = value_fn(data) i += 1 # Log if i % 50000 == 0: print(f'checkpoint: {i} {object_name}') # Checkpoint if i % checkpoint == 0: # Flush indices for index in index_by_entity.values(): index.commit() index.begin() print(f'checkpoint: {i} {object_name}') # Close indices for index in index_by_entity.values(): index.commit() index.close() # Output all indices return index_path_by_entity
class TestTransaction(BaseTestCase): """ We must use a file-based database to test the transaction functions. See http://unqlite.org/forum/trouble-with-transactions+1 for details. """ def setUp(self): self._filename = "test.db" self.db = UnQLite(self._filename) def tearDown(self): try: self.db.close() except: pass if os.path.exists(self._filename): os.unlink(self._filename) def test_transaction(self): @self.db.commit_on_success def _test_success(key, value): self.db[key] = value @self.db.commit_on_success def _test_failure(key, value): self.db[key] = value raise Exception("intentional exception raised") _test_success("k1", "v1") self.assertEqual(self.db["k1"], "v1") self.assertRaises(Exception, lambda: _test_failure("k2", "v2")) self.assertRaises(KeyError, lambda: self.db["k2"]) def test_explicit_transaction(self): self.db.close() self.db.open() self.db.begin() self.db["k1"] = "v1" self.db.rollback() self.assertRaises(KeyError, lambda: self.db["k1"])
if not all(t.isAlive() for t in threads): raise Exception("Threads are dead.") print "Finished." except KeyboardInterrupt, e: stop_evt.set() print "Stopped." stop_flag = True finally: db.commit() print "Progress Saved." return not stop_flag if __name__ == "__main__": import random db.begin() tokens = [ map(tk.__getitem__, ('consumer_key', 'consumer_secret', 'access_token', 'access_secret')) for tk in json.load(open('config/tokens.json')) ] for f in glob('llt/twitter-events-2012-2016/*.ids'): print f if not retrieve_tweets( f, 'llt/Data2/%s' % os.path.basename(f), THREAD_NUM, tokens, # proxies = [None], # proxies = ['127.0.0.1:49999'], proxies=['127.0.0.1:12305'],