def test_handle_data_update_event( self, producer, test_table, test_topic, first_test_kafka_offset, second_test_kafka_offset, data_event_handler, data_update_events, schema_wrapper_entry, patches, patch_get_payload_schema, patch_message_topic, position): expected_call_args = [] for data_event in data_update_events: position = LogPosition(log_file='binlog', log_pos=100) upstream_position_info = { "position": position.to_dict(), "cluster_name": "yelp_main", "database_name": "fake_database", "table_name": "fake_table" } data_event_handler.handle_event(data_event, position) expected_call_args.append( UpdateMessage( payload_data=data_event.row['after_values'], schema_id=schema_wrapper_entry.schema_id, upstream_position_info=upstream_position_info, previous_payload_data=data_event.row["before_values"], keys=(u'primary_key', ), timestamp=data_event.timestamp)) actual_call_args = [i[0][0] for i in producer.publish.call_args_list] self._assert_messages_as_expected(expected_call_args, actual_call_args)
def test_log_pos_replication_dict(self): p = LogPosition(log_pos=100, log_file="binlog", offset=10) assert p.to_replication_dict() == { "log_pos": 100, "log_file": "binlog" } assert p.offset == 10
def test_handle_data_update_event( self, producer, test_table, test_topic, first_test_kafka_offset, second_test_kafka_offset, data_event_handler, data_update_events, schema_wrapper_entry, patches, patch_get_payload_schema, patch_message_topic, position ): expected_call_args = [] for data_event in data_update_events: position = LogPosition(log_file='binlog', log_pos=100) upstream_position_info = { "position": position.to_dict(), "cluster_name": "yelp_main", "database_name": "fake_database", "table_name": "fake_table" } data_event_handler.handle_event(data_event, position) expected_call_args.append(UpdateMessage( payload_data=data_event.row['after_values'], schema_id=schema_wrapper_entry.schema_id, upstream_position_info=upstream_position_info, previous_payload_data=data_event.row["before_values"], keys=(u'primary_key', ), timestamp=data_event.timestamp )) actual_call_args = [i[0][0] for i in producer.publish.call_args_list] self._assert_messages_as_expected(expected_call_args, actual_call_args)
def test_transaction_id(self, fake_transaction_id_schema_id, mock_source_cluster_name): p = LogPosition(log_pos=100, log_file="binlog") actual_transaction_id = p.get_transaction_id( fake_transaction_id_schema_id, unicode(mock_source_cluster_name)) expected_transaction_id = get_ltid_meta_attribute( fake_transaction_id_schema_id, unicode(mock_source_cluster_name), u"binlog", 100) assert actual_transaction_id.schema_id == expected_transaction_id.schema_id assert actual_transaction_id.payload_data == expected_transaction_id.payload_data
def test_log_pos_dict(self): p = LogPosition(log_pos=100, log_file="binlog", offset=10, hb_serial=123, hb_timestamp=1447354877) expected_dict = { "log_pos": 100, "log_file": "binlog", "offset": 10, "hb_serial": 123, "hb_timestamp": 1447354877, } assert p.to_dict() == expected_dict
def test_transaction_id(self, fake_transaction_id_schema_id, mock_source_cluster_name): p = LogPosition(log_pos=100, log_file="binlog") actual_transaction_id = p.get_transaction_id( fake_transaction_id_schema_id, unicode(mock_source_cluster_name) ) expected_transaction_id = get_ltid_meta_attribute( fake_transaction_id_schema_id, unicode(mock_source_cluster_name), u"binlog", 100 ) assert actual_transaction_id.schema_id == expected_transaction_id.schema_id assert actual_transaction_id.payload_data == expected_transaction_id.payload_data
def rh_unsupported_query_event(self): unsupported_query_event = mock.Mock(spec=QueryEvent) unsupported_query_event.query = 'BEGIN' return ReplicationHandlerEvent( unsupported_query_event, LogPosition(log_file='binlog.001', log_pos=10) )
def _update_upstream_position(self, event): """If gtid_enabled and the next event is GtidEvent, we update the self._upstream_position with GtidPosition, if next event is not GtidEvent, we keep the current self._upstream_position, if not gtid_enabled, we update the self.upstream_position with LogPosition. TODO(cheng|DATAPIPE-172): We may need to skip duplicate heartbeats. """ if self.gtid_enabled and isinstance(event, GtidEvent): self._upstream_position = GtidPosition(gtid=event.gtid) elif (not self.gtid_enabled ) and event.schema == HEARTBEAT_DB and hasattr(event, 'row'): # row['after_values']['timestamp'] should be a datetime object without tzinfo. # we need to give it a local timezone. timestamp = self._add_tz_info_to_tz_naive_timestamp( event.row["after_values"]["timestamp"]) if self.sensu_alert_manager and self.meteorite_gauge_manager: self.sensu_alert_manager.periodic_process(timestamp) self.meteorite_gauge_manager.periodic_process(timestamp) self._log_process(timestamp, event.log_file, event.log_pos) self._upstream_position = LogPosition( log_pos=event.log_pos, log_file=event.log_file, hb_serial=event.row["after_values"]["serial"], hb_timestamp=calendar.timegm(timestamp.utctimetuple()), ) self._offset = 0
def rh_supported_query_event(self): supported_query_event = mock.Mock(spec=QueryEvent) supported_query_event.query = 'alter table biz add column name int(11)' return ReplicationHandlerEvent( supported_query_event, LogPosition(log_file='binlog.001', log_pos=50) )
def test_log_pos_dict(self): p = LogPosition( log_pos=100, log_file="binlog", offset=10, hb_serial=123, hb_timestamp=1447354877 ) expected_dict = { "log_pos": 100, "log_file": "binlog", "offset": 10, "hb_serial": 123, "hb_timestamp": 1447354877, } assert p.to_dict() == expected_dict
def test_dry_run_handler_event(self, producer, dry_run_data_event_handler, data_create_events, patches, patch_message_topic): patches.patch_dry_run_config.return_value = True for data_event in data_create_events: position = LogPosition(log_file='binlog', log_pos=100) dry_run_data_event_handler.handle_event(data_event, position) assert producer.publish.call_count == 4
def get_latest_source_log_position(self): with self.db_connections.get_source_cursor() as cursor: cursor.execute("show master status") result = cursor.fetchone() # result is a tuple with file name at pos 0, and position at pos 1. log.info("The latest master log position is {log_file}: {log_pos}".format( log_file=result[0], log_pos=result[1], )) return LogPosition(log_file=result[0], log_pos=result[1])
def _build_position(self): """ We need to instantiate a new position for each event.""" if self.gtid_enabled: return GtidPosition(gtid=self._upstream_position.gtid, offset=self._offset) else: return LogPosition( log_pos=self._upstream_position.log_pos, log_file=self._upstream_position.log_file, offset=self._offset, hb_serial=self._upstream_position.hb_serial, hb_timestamp=self._upstream_position.hb_timestamp, )
def test_get_data_events_refresh(self, mock_db_connections, patch_stream): data_event = self._prepare_data_event( 'fake_table_data_pipeline_refresh') patch_stream.return_value.fetchone.side_effect = [data_event] assert len(data_event.rows) == 3 stream = LowLevelBinlogStreamReaderWrapper( mock_db_connections.source_database_config, mock_db_connections.tracker_database_config, LogPosition( log_pos=100, log_file="binlog.001", )) assert stream.pop().table == 'fake_table' assert stream.pop().message_type == RefreshMessage
def test_none_events(self, mock_db_connections, patch_stream): query_event = mock.Mock(spec=QueryEvent) patch_stream.return_value.fetchone.side_effect = [ None, query_event, ] stream = LowLevelBinlogStreamReaderWrapper( mock_db_connections.source_database_config, mock_db_connections.tracker_database_config, LogPosition( log_pos=100, log_file="binlog.001", )) assert stream.peek() == query_event assert stream.pop() == query_event
def test_get_only_tables(self, mock_db_connections, patch_config_whitelist): patch_config_whitelist.return_value = [ 'tab1', 'tab2', 'tab1_data_pipeline_refresh' ] expected_only_tables = [ 'tab1', 'tab1_data_pipeline_refresh', 'tab2', 'tab2_data_pipeline_refresh' ] stream = LowLevelBinlogStreamReaderWrapper( mock_db_connections.source_database_config, mock_db_connections.tracker_database_config, LogPosition( log_pos=100, log_file="binlog.001", )) assert expected_only_tables == stream._get_only_tables()
def test_flattern_data_events(self, mock_db_connections, patch_stream): data_event = self._prepare_data_event('fake_table') gtid_event = mock.Mock(spec=GtidEvent) query_event = mock.Mock(spec=QueryEvent) patch_stream.return_value.fetchone.side_effect = [ gtid_event, query_event, data_event, ] assert len(data_event.rows) == 3 stream = LowLevelBinlogStreamReaderWrapper( mock_db_connections.source_database_config, mock_db_connections.tracker_database_config, LogPosition( log_pos=100, log_file="binlog.001", )) assert stream.peek() == gtid_event assert stream.pop() == gtid_event assert stream.pop() == query_event assert stream.pop().row == data_event.rows[0] assert stream.pop().row == data_event.rows[1] assert stream.pop().row == data_event.rows[2]
def _setup_stream_and_expected_result( self, source_database_config, tracker_database_config, patch_stream ): log_pos = 10 log_file = "binlog.001" row = {"after_values": { "serial": 123, # This timestamp is Wed, 21 Oct 2015 12:05:27 GMT "timestamp": datetime.fromtimestamp(1445429127) }} heartbeat_event = mock.Mock( spec=DataEvent, schema='yelp_heartbeat', log_pos=log_pos, log_file=log_file, row=row ) data_event_0 = mock.Mock(spec=DataEvent, table="business", schema="yelp") data_event_1 = mock.Mock(spec=DataEvent, table="business", schema="yelp") data_event_2 = mock.Mock(spec=DataEvent, table="business", schema="yelp") event_list = [ heartbeat_event, data_event_0, data_event_1, data_event_2, ] patch_stream.return_value.peek.side_effect = event_list patch_stream.return_value.pop.side_effect = event_list stream = SimpleBinlogStreamReaderWrapper( source_database_config, tracker_database_config, LogPosition( log_pos=log_pos, log_file=log_file, offset=0 ), gtid_enabled=False, ) # Since the offset is 0, so the result should start offset 1, and skip # data_event_0 which is at offset 0. results = [ ReplicationHandlerEvent( event=data_event_1, position=LogPosition( log_pos=log_pos, log_file=log_file, offset=1, hb_serial=123, # This is Wed, 21 Oct 2015 12:05:27 GMT hb_timestamp=1445429127, ) ), ReplicationHandlerEvent( event=data_event_2, position=LogPosition( log_pos=log_pos, log_file=log_file, offset=2, hb_serial=123, # This is Wed, 21 Oct 2015 12:05:27 GMT hb_timestamp=1445429127, ) ) ] return stream, results
def position_before_master(self): return LogPosition(log_file='binlog.001', log_pos=120)
def position_after_master(self): return LogPosition(log_file='binlog.001', log_pos=300)
def position(self, gtid_enabled): if gtid_enabled: return GtidPosition(gtid="sid:10") else: return LogPosition(log_file='binlog', log_pos=100)
def get_position_to_resume_tailing_from(self): if self.global_event_state: return construct_position(self.global_event_state.position) return GtidPosition() if self.gtid_enabled else LogPosition()
def test_log_pos_replication_dict(self): p = LogPosition(log_pos=100, log_file="binlog", offset=10) assert p.to_replication_dict() == {"log_pos": 100, "log_file": "binlog"} assert p.offset == 10