def test_handle_data_update_event(
         self, producer, test_table, test_topic, first_test_kafka_offset,
         second_test_kafka_offset, data_event_handler, data_update_events,
         schema_wrapper_entry, patches, patch_get_payload_schema,
         patch_message_topic, position):
     expected_call_args = []
     for data_event in data_update_events:
         position = LogPosition(log_file='binlog', log_pos=100)
         upstream_position_info = {
             "position": position.to_dict(),
             "cluster_name": "yelp_main",
             "database_name": "fake_database",
             "table_name": "fake_table"
         }
         data_event_handler.handle_event(data_event, position)
         expected_call_args.append(
             UpdateMessage(
                 payload_data=data_event.row['after_values'],
                 schema_id=schema_wrapper_entry.schema_id,
                 upstream_position_info=upstream_position_info,
                 previous_payload_data=data_event.row["before_values"],
                 keys=(u'primary_key', ),
                 timestamp=data_event.timestamp))
     actual_call_args = [i[0][0] for i in producer.publish.call_args_list]
     self._assert_messages_as_expected(expected_call_args, actual_call_args)
示例#2
0
 def test_log_pos_replication_dict(self):
     p = LogPosition(log_pos=100, log_file="binlog", offset=10)
     assert p.to_replication_dict() == {
         "log_pos": 100,
         "log_file": "binlog"
     }
     assert p.offset == 10
 def test_handle_data_update_event(
     self,
     producer,
     test_table,
     test_topic,
     first_test_kafka_offset,
     second_test_kafka_offset,
     data_event_handler,
     data_update_events,
     schema_wrapper_entry,
     patches,
     patch_get_payload_schema,
     patch_message_topic,
     position
 ):
     expected_call_args = []
     for data_event in data_update_events:
         position = LogPosition(log_file='binlog', log_pos=100)
         upstream_position_info = {
             "position": position.to_dict(),
             "cluster_name": "yelp_main",
             "database_name": "fake_database",
             "table_name": "fake_table"
         }
         data_event_handler.handle_event(data_event, position)
         expected_call_args.append(UpdateMessage(
             payload_data=data_event.row['after_values'],
             schema_id=schema_wrapper_entry.schema_id,
             upstream_position_info=upstream_position_info,
             previous_payload_data=data_event.row["before_values"],
             keys=(u'primary_key', ),
             timestamp=data_event.timestamp
         ))
     actual_call_args = [i[0][0] for i in producer.publish.call_args_list]
     self._assert_messages_as_expected(expected_call_args, actual_call_args)
示例#4
0
 def test_transaction_id(self, fake_transaction_id_schema_id,
                         mock_source_cluster_name):
     p = LogPosition(log_pos=100, log_file="binlog")
     actual_transaction_id = p.get_transaction_id(
         fake_transaction_id_schema_id, unicode(mock_source_cluster_name))
     expected_transaction_id = get_ltid_meta_attribute(
         fake_transaction_id_schema_id, unicode(mock_source_cluster_name),
         u"binlog", 100)
     assert actual_transaction_id.schema_id == expected_transaction_id.schema_id
     assert actual_transaction_id.payload_data == expected_transaction_id.payload_data
示例#5
0
 def test_log_pos_dict(self):
     p = LogPosition(log_pos=100,
                     log_file="binlog",
                     offset=10,
                     hb_serial=123,
                     hb_timestamp=1447354877)
     expected_dict = {
         "log_pos": 100,
         "log_file": "binlog",
         "offset": 10,
         "hb_serial": 123,
         "hb_timestamp": 1447354877,
     }
     assert p.to_dict() == expected_dict
 def test_transaction_id(self, fake_transaction_id_schema_id, mock_source_cluster_name):
     p = LogPosition(log_pos=100, log_file="binlog")
     actual_transaction_id = p.get_transaction_id(
         fake_transaction_id_schema_id,
         unicode(mock_source_cluster_name)
     )
     expected_transaction_id = get_ltid_meta_attribute(
         fake_transaction_id_schema_id,
         unicode(mock_source_cluster_name),
         u"binlog",
         100
     )
     assert actual_transaction_id.schema_id == expected_transaction_id.schema_id
     assert actual_transaction_id.payload_data == expected_transaction_id.payload_data
示例#7
0
 def rh_unsupported_query_event(self):
     unsupported_query_event = mock.Mock(spec=QueryEvent)
     unsupported_query_event.query = 'BEGIN'
     return ReplicationHandlerEvent(
         unsupported_query_event,
         LogPosition(log_file='binlog.001', log_pos=10)
     )
示例#8
0
 def _update_upstream_position(self, event):
     """If gtid_enabled and the next event is GtidEvent,
     we update the self._upstream_position with GtidPosition, if next event is
     not GtidEvent, we keep the current self._upstream_position, if not gtid_enabled,
     we update the self.upstream_position with LogPosition.
     TODO(cheng|DATAPIPE-172): We may need to skip duplicate heartbeats.
     """
     if self.gtid_enabled and isinstance(event, GtidEvent):
         self._upstream_position = GtidPosition(gtid=event.gtid)
     elif (not self.gtid_enabled
           ) and event.schema == HEARTBEAT_DB and hasattr(event, 'row'):
         # row['after_values']['timestamp'] should be a datetime object without tzinfo.
         # we need to give it a local timezone.
         timestamp = self._add_tz_info_to_tz_naive_timestamp(
             event.row["after_values"]["timestamp"])
         if self.sensu_alert_manager and self.meteorite_gauge_manager:
             self.sensu_alert_manager.periodic_process(timestamp)
             self.meteorite_gauge_manager.periodic_process(timestamp)
         self._log_process(timestamp, event.log_file, event.log_pos)
         self._upstream_position = LogPosition(
             log_pos=event.log_pos,
             log_file=event.log_file,
             hb_serial=event.row["after_values"]["serial"],
             hb_timestamp=calendar.timegm(timestamp.utctimetuple()),
         )
     self._offset = 0
示例#9
0
 def rh_supported_query_event(self):
     supported_query_event = mock.Mock(spec=QueryEvent)
     supported_query_event.query = 'alter table biz add column name int(11)'
     return ReplicationHandlerEvent(
         supported_query_event,
         LogPosition(log_file='binlog.001', log_pos=50)
     )
 def test_log_pos_dict(self):
     p = LogPosition(
         log_pos=100,
         log_file="binlog",
         offset=10,
         hb_serial=123,
         hb_timestamp=1447354877
     )
     expected_dict = {
         "log_pos": 100,
         "log_file": "binlog",
         "offset": 10,
         "hb_serial": 123,
         "hb_timestamp": 1447354877,
     }
     assert p.to_dict() == expected_dict
 def test_dry_run_handler_event(self, producer, dry_run_data_event_handler,
                                data_create_events, patches,
                                patch_message_topic):
     patches.patch_dry_run_config.return_value = True
     for data_event in data_create_events:
         position = LogPosition(log_file='binlog', log_pos=100)
         dry_run_data_event_handler.handle_event(data_event, position)
     assert producer.publish.call_count == 4
示例#12
0
 def get_latest_source_log_position(self):
     with self.db_connections.get_source_cursor() as cursor:
         cursor.execute("show master status")
         result = cursor.fetchone()
     # result is a tuple with file name at pos 0, and position at pos 1.
     log.info("The latest master log position is {log_file}: {log_pos}".format(
         log_file=result[0],
         log_pos=result[1],
     ))
     return LogPosition(log_file=result[0], log_pos=result[1])
示例#13
0
 def _build_position(self):
     """ We need to instantiate a new position for each event."""
     if self.gtid_enabled:
         return GtidPosition(gtid=self._upstream_position.gtid,
                             offset=self._offset)
     else:
         return LogPosition(
             log_pos=self._upstream_position.log_pos,
             log_file=self._upstream_position.log_file,
             offset=self._offset,
             hb_serial=self._upstream_position.hb_serial,
             hb_timestamp=self._upstream_position.hb_timestamp,
         )
示例#14
0
 def test_get_data_events_refresh(self, mock_db_connections, patch_stream):
     data_event = self._prepare_data_event(
         'fake_table_data_pipeline_refresh')
     patch_stream.return_value.fetchone.side_effect = [data_event]
     assert len(data_event.rows) == 3
     stream = LowLevelBinlogStreamReaderWrapper(
         mock_db_connections.source_database_config,
         mock_db_connections.tracker_database_config,
         LogPosition(
             log_pos=100,
             log_file="binlog.001",
         ))
     assert stream.pop().table == 'fake_table'
     assert stream.pop().message_type == RefreshMessage
示例#15
0
 def test_none_events(self, mock_db_connections, patch_stream):
     query_event = mock.Mock(spec=QueryEvent)
     patch_stream.return_value.fetchone.side_effect = [
         None,
         query_event,
     ]
     stream = LowLevelBinlogStreamReaderWrapper(
         mock_db_connections.source_database_config,
         mock_db_connections.tracker_database_config,
         LogPosition(
             log_pos=100,
             log_file="binlog.001",
         ))
     assert stream.peek() == query_event
     assert stream.pop() == query_event
示例#16
0
    def test_get_only_tables(self, mock_db_connections,
                             patch_config_whitelist):
        patch_config_whitelist.return_value = [
            'tab1', 'tab2', 'tab1_data_pipeline_refresh'
        ]
        expected_only_tables = [
            'tab1', 'tab1_data_pipeline_refresh', 'tab2',
            'tab2_data_pipeline_refresh'
        ]
        stream = LowLevelBinlogStreamReaderWrapper(
            mock_db_connections.source_database_config,
            mock_db_connections.tracker_database_config,
            LogPosition(
                log_pos=100,
                log_file="binlog.001",
            ))

        assert expected_only_tables == stream._get_only_tables()
示例#17
0
 def test_flattern_data_events(self, mock_db_connections, patch_stream):
     data_event = self._prepare_data_event('fake_table')
     gtid_event = mock.Mock(spec=GtidEvent)
     query_event = mock.Mock(spec=QueryEvent)
     patch_stream.return_value.fetchone.side_effect = [
         gtid_event,
         query_event,
         data_event,
     ]
     assert len(data_event.rows) == 3
     stream = LowLevelBinlogStreamReaderWrapper(
         mock_db_connections.source_database_config,
         mock_db_connections.tracker_database_config,
         LogPosition(
             log_pos=100,
             log_file="binlog.001",
         ))
     assert stream.peek() == gtid_event
     assert stream.pop() == gtid_event
     assert stream.pop() == query_event
     assert stream.pop().row == data_event.rows[0]
     assert stream.pop().row == data_event.rows[1]
     assert stream.pop().row == data_event.rows[2]
 def _setup_stream_and_expected_result(
     self,
     source_database_config,
     tracker_database_config,
     patch_stream
 ):
     log_pos = 10
     log_file = "binlog.001"
     row = {"after_values": {
         "serial": 123,
         # This timestamp is Wed, 21 Oct 2015 12:05:27 GMT
         "timestamp": datetime.fromtimestamp(1445429127)
     }}
     heartbeat_event = mock.Mock(
         spec=DataEvent,
         schema='yelp_heartbeat',
         log_pos=log_pos,
         log_file=log_file,
         row=row
     )
     data_event_0 = mock.Mock(spec=DataEvent, table="business", schema="yelp")
     data_event_1 = mock.Mock(spec=DataEvent, table="business", schema="yelp")
     data_event_2 = mock.Mock(spec=DataEvent, table="business", schema="yelp")
     event_list = [
         heartbeat_event,
         data_event_0,
         data_event_1,
         data_event_2,
     ]
     patch_stream.return_value.peek.side_effect = event_list
     patch_stream.return_value.pop.side_effect = event_list
     stream = SimpleBinlogStreamReaderWrapper(
         source_database_config,
         tracker_database_config,
         LogPosition(
             log_pos=log_pos,
             log_file=log_file,
             offset=0
         ),
         gtid_enabled=False,
     )
     # Since the offset is 0, so the result should start offset 1, and skip
     # data_event_0 which is at offset 0.
     results = [
         ReplicationHandlerEvent(
             event=data_event_1,
             position=LogPosition(
                 log_pos=log_pos,
                 log_file=log_file,
                 offset=1,
                 hb_serial=123,
                 # This is Wed, 21 Oct 2015 12:05:27 GMT
                 hb_timestamp=1445429127,
             )
         ),
         ReplicationHandlerEvent(
             event=data_event_2,
             position=LogPosition(
                 log_pos=log_pos,
                 log_file=log_file,
                 offset=2,
                 hb_serial=123,
                 # This is Wed, 21 Oct 2015 12:05:27 GMT
                 hb_timestamp=1445429127,
             )
         )
     ]
     return stream, results
示例#19
0
 def position_before_master(self):
     return LogPosition(log_file='binlog.001', log_pos=120)
示例#20
0
 def position_after_master(self):
     return LogPosition(log_file='binlog.001', log_pos=300)
 def position(self, gtid_enabled):
     if gtid_enabled:
         return GtidPosition(gtid="sid:10")
     else:
         return LogPosition(log_file='binlog', log_pos=100)
示例#22
0
 def get_position_to_resume_tailing_from(self):
     if self.global_event_state:
         return construct_position(self.global_event_state.position)
     return GtidPosition() if self.gtid_enabled else LogPosition()
 def test_log_pos_replication_dict(self):
     p = LogPosition(log_pos=100, log_file="binlog", offset=10)
     assert p.to_replication_dict() == {"log_pos": 100, "log_file": "binlog"}
     assert p.offset == 10