def test_delete_all_documents(depositor: FirestoreDepositor): depositor.set_current_topic_table('test-001', 'aged_data') for doc in depositor.get_stream_by_sort_key(le_ge_key='20201113222500000156'): doc_dict = depositor.get_header_from_ref(doc) assert 'merged_level' not in doc_dict depositor.delete_documents([doc]) break for doc in depositor.get_stream_by_sort_key(): depositor.delete_documents([doc])
def main(): if request.method == 'GET': return render_template("index.html"), 200 envelope = request.get_json() if not envelope: return "no Pub/Sub message received", 204 if not isinstance(envelope, dict) or 'message' not in envelope: return "invalid Pub/Sub message format", 204 data_header = envelope['message']['attributes'] global firestore_db global pub_client global gcs_storer publishers = {'pubsub': PubsubPublisher(pub_client=pub_client)} depositor = FirestoreDepositor(db=firestore_db) archiver = GCSListArchiver(storer=gcs_storer) storers = [gcs_storer] loader = Loader(publishers=publishers, depositor=depositor, archiver=archiver, storers=storers) if loader.load(load_config=json.loads(data_header['load_config'])): return "load message received", 200 else: # pragma: no cover return "load message to be resent", 400 # pragma: no cover
def test_add_normal_header_document(depositor: FirestoreDepositor): add_normal_header(depositor) depositor.set_current_topic_table('test-001', 'person_complex') header_ref = depositor.get_table_header() assert header_ref is not None header_dict = depositor.get_header_from_ref(header_ref) header_data = depositor.get_data_from_header(header_dict) assert header_dict['topic_id'] == 'test-001' assert header_dict['aged'] == False assert len(header_data) == 14 depositor.delete_documents([header_ref]) header_ref = depositor.get_table_header() assert header_ref is None
def packager_callback(s: PubsubSubscriber, message: dict, source, subscription_id): global firestore_db global gcs_storer depositor = FirestoreDepositor(db=firestore_db) archiver = GCSListArchiver(storer=gcs_storer) packager = Packager(archiver=archiver, depositor=depositor) header, data, id = s.unpack_message(message) header = dict(header) packager.package_data(header['topic_id'], header['table_id']) s.ack(project_id, subscription_id, id)
def receiver_callback(s: PubsubSubscriber, message: dict, source, subscription_id): global project_id global firestore_db global gcs_storer publishers = {'pubsub': PubsubPublisher(pub_client=pub_client)} depositor = FirestoreDepositor(db=firestore_db) archiver = GCSListArchiver(storer=gcs_storer) storers = [gcs_storer] receiver = Dispatcher(publishers=publishers, depositor=depositor, storers=storers) header, data, id = s.unpack_message(message) receiver.receive_data(header, data) s.ack(project_id, subscription_id, id)
def loader_callback(s: PubsubSubscriber, message: dict, source, subscription_id): global project_id global firestore_db global gcs_storer publishers = {'pubsub': PubsubPublisher(pub_client=pub_client)} depositor = FirestoreDepositor(db=firestore_db) archiver = GCSListArchiver(storer=gcs_storer) storers = [gcs_storer] loader = Loader(publishers=publishers, depositor=depositor, archiver=archiver, storers=storers) header, data, id = s.unpack_message(message) print(header) #cleaner.clean_data(header['topic_id'], header['table_id'], header['start_seq']) loader.load(load_config=json.loads(header['load_config'])) s.ack(project_id, subscription_id, id)
def insight_receiver(): if request.method == 'GET': return render_template("index.html"), 200 envelope = request.get_json() if not envelope: return "no Pub/Sub message received", 204 if not isinstance(envelope, dict) or 'message' not in envelope: return "invalid Pub/Sub message format", 204 data_header = envelope['message']['attributes'] global firestore_db global gcs_storer depositor = FirestoreDepositor(db=firestore_db) archiver = GCSListArchiver(storer=gcs_storer) packager = Packager(archiver=archiver, depositor=depositor) if packager.package_data(data_header['topic_id'], data_header['table_id']): return "package message received", 200 else: # pragma: no cover return "package message to be resent", 400 # pragma: no cover
def insight_receiver(): if request.method == 'GET': return render_template("index.html"), 200 envelope = request.get_json() if not envelope: return "no Pub/Sub message received", 204 if not isinstance(envelope, dict) or 'message' not in envelope: return "invalid Pub/Sub message format", 204 data_header = envelope['message']['attributes'] global firestore_db depositor = FirestoreDepositor(db=firestore_db) merger = Merger(depositor=depositor) if merger.merge_data(data_header['topic_id'], data_header['table_id'], data_header['merge_key'], int(data_header['merge_level']), int(data_header['target_merge_level'])): return "merge message received", 200 # pragma: no cover else: # pragma: no cover return "merge message to be resent", 400 # pragma: no cover
def main(): if request.method == 'GET': return render_template("index.html"), 200 envelope = request.get_json() if not envelope: return "no Pub/Sub message received", 204 if not isinstance(envelope, dict) or 'message' not in envelope: return "invalid Pub/Sub message format", 204 data_header = envelope['message']['attributes'] data_body = json.loads( gzip.decompress(base64.b64decode( envelope['message']['data'])).decode()) global firestore_db global pub_client global gcs_storer publishers = {'pubsub': PubsubPublisher(pub_client=pub_client)} depositor = FirestoreDepositor(db=firestore_db) storers = [gcs_storer] if 'INSIGHT_SUB_LIST' in os.environ: sub_list = json.loads( gzip.decompress( base64.b64decode(os.environ.get('INSIGHT_SUB_LIST'))).decode()) else: sub_list = {} dipatcher = Dispatcher(publishers=publishers, depositor=depositor, storers=storers, subscription_list=sub_list) if dipatcher.receive_data(data_header, data_body): return "message received", 200 else: # pragma: no cover return "message to be resent", 400 # pragma: no cover
def test_exceptions(): with pytest.raises(TypeError): depo = FirestoreDepositor(db=object())
def test_diverse_items(depositor: FirestoreDepositor): depositor.set_current_topic_table('test-001', 'aged_data') assert depositor._get_filter_key('packaged', 8) == 8 for doc in depositor.get_stream_by_sort_key(le_ge_key='20201113222500000156'): doc_dict = depositor.get_header_from_ref(doc) assert doc_dict['sort_key'] == '20201113222500000156' change_header = {'merge_status': 'packaged', 'merged_level': depositor.DELETE} depositor.update_document(doc, change_header) doc_dict = depositor.get_header_from_ref(doc) break for doc in depositor.get_stream_by_sort_key(le_ge_key='20201113222500000156', equal=False): doc_dict = depositor.get_header_from_ref(doc) assert doc_dict['sort_key'] != '20201113222500000156' change_header = {'merged_level': 3, 'filter_key': 7} depositor.update_document(doc, change_header) doc_dict = depositor.get_header_from_ref(doc) break for doc in depositor.get_stream_by_sort_key(status_list=['merged'], min_merge_level=4): break # pragma: no cover
def test_merge_aged_simple(depositor: FirestoreDepositor): depositor.set_current_topic_table('test-001', 'aged_data') depositor.size_limit = 5000 assert not depositor.merge_documents('20201113222500000267', 2) assert depositor.merge_documents('20201113222500000267', 1) assert depositor.merge_documents('20201113222500000156', 1) assert not depositor.merge_documents('20201113222500000267', 2) depositor.size_limit = 2 ** 20 assert depositor.merge_documents('20201113222500000221', 1) assert depositor.merge_documents('20201113222500000267', 2) assert depositor.merge_documents('20201113222500000267', 1) counter, total_size = 0, 0 for doc in depositor.get_stream_by_sort_key(status_list=['merged']): doc_dict = depositor.get_header_from_ref(doc) doc_data = depositor.get_data_from_header(doc_dict) counter += len(doc_data) total_size += doc_dict['data_size'] assert doc_dict['line_nb'] == len(doc_data) header_ref = depositor.get_table_header() header_dict = depositor.get_header_from_ref(header_ref) assert counter == 266 assert total_size == header_dict['merged_size']
def depositor(): depositor = FirestoreDepositor(db=firestore.Client()) yield depositor