def test_regular_parameters( self, parameter, mock_container, producer ): """ Verify that most parameters can be specified at instantiation time, and overriden at publish time. """ mock_container.config = {'AMQP_URI': 'memory://localhost'} mock_container.service_name = "service" worker_ctx = Mock() worker_ctx.context_data = {} instantiation_value = Mock() publish_value = Mock() publisher = Publisher( **{parameter: instantiation_value} ).bind(mock_container, "publish") publisher.setup() publish = publisher.get_dependency(worker_ctx) publish("payload") assert producer.publish.call_args[1][parameter] == instantiation_value publish("payload", **{parameter: publish_value}) assert producer.publish.call_args[1][parameter] == publish_value
def test_declare(self, mock_container, producer): """ Declarations provided at publish time are merged with any provided at instantiation time. Any provided exchange and queue are always declared. """ mock_container.config = { 'AMQP_URI': 'memory://localhost' } mock_container.service_name = "service" worker_ctx = Mock() worker_ctx.context_data = {} exchange = Mock() instantiation_value = [Mock()] publish_value = [Mock()] publisher = Publisher( exchange=exchange, **{'declare': instantiation_value} ).bind(mock_container, "publish") publisher.setup() publish = publisher.get_dependency(worker_ctx) publish("payload") assert producer.publish.call_args[1]['declare'] == ( instantiation_value + [exchange] ) publish("payload", declare=publish_value) assert producer.publish.call_args[1]['declare'] == ( instantiation_value + [exchange] + publish_value )
def test_publish_custom_headers( mock_container, maybe_declare, mock_producer, mock_connection, rabbit_config ): container = mock_container container.config = rabbit_config container.service_name = "srcservice" ctx_data = {'language': 'en', 'customheader': 'customvalue'} service = Mock() worker_ctx = WorkerContext( container, service, DummyProvider('method'), data=ctx_data ) publisher = Publisher(queue=foobar_queue).bind(container, "publish") # test declarations publisher.setup() maybe_declare.assert_called_once_with(foobar_queue, mock_connection) # test publish msg = "msg" headers = {'nameko.language': 'en', 'nameko.customheader': 'customvalue', 'nameko.call_id_stack': ['srcservice.method.0']} service.publish = publisher.get_dependency(worker_ctx) service.publish(msg, publish_kwarg="value") mock_producer.publish.assert_called_once_with( msg, headers=headers, exchange=foobar_ex, retry=True, serializer=container.serializer, mandatory=False, retry_policy=DEFAULT_RETRY_POLICY, publish_kwarg="value")
def test_declare(self, mock_container, producer): """ Declarations provided at publish time are merged with any provided at instantiation time. Any provided exchange and queue are always declared. """ mock_container.config = { 'AMQP_URI': 'memory://localhost' } mock_container.service_name = "service" worker_ctx = Mock() worker_ctx.context_data = {} exchange = Mock() queue = Mock() instantiation_value = [Mock()] publish_value = [Mock()] publisher = Publisher( exchange=exchange, queue=queue, **{'declare': instantiation_value} ).bind(mock_container, "publish") publisher.setup() publish = publisher.get_dependency(worker_ctx) publish("payload") assert producer.publish.call_args[1]['declare'] == ( instantiation_value + [exchange, queue] ) publish("payload", declare=publish_value) assert producer.publish.call_args[1]['declare'] == ( instantiation_value + [exchange, queue] + publish_value )
def test_use_confirms(self, mock_container, get_producer): """ Verify that publish-confirms can be set as a default specified at instantiation time, which can be overriden by a value specified at publish time. """ mock_container.config = {'AMQP_URI': 'memory://localhost'} mock_container.service_name = "service" worker_ctx = Mock() worker_ctx.context_data = {} publisher = Publisher( use_confirms=False ).bind(mock_container, "publish") publisher.setup() publish = publisher.get_dependency(worker_ctx) publish("payload") (_, use_confirms, _), _ = get_producer.call_args assert use_confirms is False publish("payload", use_confirms=True) (_, use_confirms, _), _ = get_producer.call_args assert use_confirms is True
def test_publish_to_exchange(maybe_declare, patch_publisher, mock_container): container = mock_container container.service_name = "srcservice" service = Mock() worker_ctx = WorkerContext(container, service, DummyProvider("publish")) publisher = Publisher(exchange=foobar_ex).bind(container, "publish") producer = Mock() connection = Mock() get_connection, get_producer = patch_publisher(publisher) get_connection.return_value = as_context_manager(connection) get_producer.return_value = as_context_manager(producer) # test declarations publisher.setup() maybe_declare.assert_called_once_with(foobar_ex, connection) # test publish msg = "msg" service.publish = publisher.get_dependency(worker_ctx) service.publish(msg, publish_kwarg="value") headers = { 'nameko.call_id_stack': ['srcservice.publish.0'] } producer.publish.assert_called_once_with( msg, headers=headers, exchange=foobar_ex, retry=True, serializer=container.serializer, retry_policy=DEFAULT_RETRY_POLICY, publish_kwarg="value")
def test_use_confirms(self, mock_container, get_producer): """ Verify that publish-confirms can be set as a default specified at instantiation time, which can be overriden by a value specified at publish time. """ mock_container.config = {'AMQP_URI': 'memory://localhost'} mock_container.service_name = "service" worker_ctx = Mock() worker_ctx.context_data = {} publisher = Publisher( use_confirms=False ).bind(mock_container, "publish") publisher.setup() publish = publisher.get_dependency(worker_ctx) publish("payload") use_confirms = get_producer.call_args[0][3].get('confirm_publish') assert use_confirms is False publish("payload", use_confirms=True) use_confirms = get_producer.call_args[0][3].get('confirm_publish') assert use_confirms is True
def test_publish_to_queue(empty_config, maybe_declare, patch_publisher): container = Mock(spec=ServiceContainer) container.shared_extensions = {} container.service_name = "srcservice" container.config = empty_config ctx_data = {"language": "en"} service = Mock() worker_ctx = WorkerContext(container, service, DummyProvider("publish"), data=ctx_data) publisher = Publisher(queue=foobar_queue).bind(container, "publish") producer = Mock() connection = Mock() get_connection, get_producer = patch_publisher(publisher) get_connection.return_value = as_context_manager(connection) get_producer.return_value = as_context_manager(producer) # test declarations publisher.setup() maybe_declare.assert_called_once_with(foobar_queue, connection) # test publish msg = "msg" headers = {"nameko.language": "en", "nameko.call_id_stack": ["srcservice.publish.0"]} service.publish = publisher.get_dependency(worker_ctx) service.publish(msg, publish_kwarg="value") producer.publish.assert_called_once_with( msg, headers=headers, exchange=foobar_ex, retry=True, retry_policy=DEFAULT_RETRY_POLICY, publish_kwarg="value" )
def test_publish_to_queue(patch_maybe_declare, mock_producer, mock_connection, mock_container): container = mock_container container.config = {'AMQP_URI': 'memory://'} container.shared_extensions = {} container.service_name = "srcservice" ctx_data = {'language': 'en'} service = Mock() worker_ctx = WorkerContext(container, service, DummyProvider("publish"), data=ctx_data) publisher = Publisher(queue=foobar_queue).bind(container, "publish") # test declarations publisher.setup() assert patch_maybe_declare.call_args_list == [ call(foobar_queue, mock_connection) ] # test publish msg = "msg" headers = { 'nameko.language': 'en', 'nameko.call_id_stack': ['srcservice.publish.0'], } service.publish = publisher.get_dependency(worker_ctx) service.publish(msg, publish_kwarg="value") expected_args = ('msg', ) expected_kwargs = { 'publish_kwarg': "value", 'exchange': foobar_ex, 'headers': headers, 'declare': publisher.declare, 'retry': publisher.publisher_cls.retry, 'retry_policy': publisher.publisher_cls.retry_policy, 'compression': publisher.publisher_cls.compression, 'mandatory': publisher.publisher_cls.mandatory, 'expiration': publisher.publisher_cls.expiration, 'delivery_mode': publisher.publisher_cls.delivery_mode, 'priority': publisher.publisher_cls.priority, 'serializer': publisher.serializer } assert mock_producer.publish.call_args_list == [ call(*expected_args, **expected_kwargs) ]
def test_publish_to_queue( patch_maybe_declare, mock_producer, mock_channel, mock_container ): container = mock_container container.config = {'AMQP_URI': 'memory://'} container.shared_extensions = {} container.service_name = "srcservice" ctx_data = {'language': 'en'} service = Mock() worker_ctx = WorkerContext( container, service, DummyProvider("publish"), data=ctx_data) publisher = Publisher(queue=foobar_queue).bind(container, "publish") # test declarations publisher.setup() assert patch_maybe_declare.call_args_list == [ call(foobar_queue, mock_channel) ] # test publish msg = "msg" headers = { 'nameko.language': 'en', 'nameko.call_id_stack': ['srcservice.publish.0'], } service.publish = publisher.get_dependency(worker_ctx) service.publish(msg, publish_kwarg="value") expected_args = ('msg',) expected_kwargs = { 'publish_kwarg': "value", 'exchange': foobar_ex, 'headers': headers, 'declare': publisher.declare, 'retry': publisher.publisher_cls.retry, 'retry_policy': publisher.publisher_cls.retry_policy, 'compression': publisher.publisher_cls.compression, 'mandatory': publisher.publisher_cls.mandatory, 'expiration': publisher.publisher_cls.expiration, 'delivery_mode': publisher.publisher_cls.delivery_mode, 'priority': publisher.publisher_cls.priority, 'serializer': publisher.serializer } assert mock_producer.publish.call_args_list == [ call(*expected_args, **expected_kwargs) ]
class PublisherService(object): name = "publisher" publish = Publisher() @dummy def method(self, payload): return self.publish(payload, routing_key=queue.name)
class Service(object): name = "publisher" publish = Publisher() @dummy def method(self, *args, **kwargs): self.publish(*args, **kwargs)
def test_unserialisable_headers(rabbit_manager, rabbit_config, mock_container): vhost = rabbit_config['vhost'] container = mock_container container.service_name = "service" container.config = rabbit_config container.spawn_managed_thread = eventlet.spawn ctx_data = {'language': 'en', 'customheader': None} service = Mock() worker_ctx = WorkerContext( container, service, DummyProvider('method'), data=ctx_data ) publisher = Publisher( exchange=foobar_ex, queue=foobar_queue).bind(container, "publish") publisher.setup() publisher.start() service.publish = publisher.get_dependency(worker_ctx) service.publish("msg") messages = rabbit_manager.get_messages(vhost, foobar_queue.name) assert messages[0]['properties']['headers'] == { 'nameko.language': 'en', 'nameko.call_id_stack': ['service.method.0'], # no `customheader` }
def test_headers(self, mock_container, producer): """ Headers provided at publish time are merged with any provided at instantiation time. Nameko headers are always present. """ mock_container.config = { 'AMQP_URI': 'memory://localhost' } mock_container.service_name = "service" # use a real worker context so nameko headers are generated service = Mock() entrypoint = Mock(method_name="method") worker_ctx = WorkerContext( mock_container, service, entrypoint, data={'context': 'data'} ) nameko_headers = { 'nameko.context': 'data', 'nameko.call_id_stack': ['service.method.0'], } instantiation_value = {'foo': Mock()} publish_value = {'bar': Mock()} publisher = Publisher( **{'headers': instantiation_value} ).bind(mock_container, "publish") publisher.setup() publish = publisher.get_dependency(worker_ctx) def merge_dicts(base, *updates): merged = base.copy() [merged.update(update) for update in updates] return merged publish("payload") assert producer.publish.call_args[1]['headers'] == merge_dicts( nameko_headers, instantiation_value ) publish("payload", headers=publish_value) assert producer.publish.call_args[1]['headers'] == merge_dicts( nameko_headers, instantiation_value, publish_value )
def test_publish_custom_headers(mock_container, mock_producer, mock_connection, rabbit_config): container = mock_container container.config = rabbit_config container.service_name = "srcservice" ctx_data = {'language': 'en', 'customheader': 'customvalue'} service = Mock() worker_ctx = WorkerContext(container, service, DummyProvider('method'), data=ctx_data) publisher = Publisher(queue=foobar_queue).bind(container, "publish") publisher.setup() # test publish msg = "msg" headers = { 'nameko.language': 'en', 'nameko.customheader': 'customvalue', 'nameko.call_id_stack': ['srcservice.method.0'] } service.publish = publisher.get_dependency(worker_ctx) service.publish(msg, publish_kwarg="value") expected_args = ('msg', ) expected_kwargs = { 'publish_kwarg': "value", 'exchange': foobar_ex, 'headers': headers, 'declare': publisher.declare, 'retry': publisher.publisher_cls.retry, 'retry_policy': publisher.publisher_cls.retry_policy, 'compression': publisher.publisher_cls.compression, 'mandatory': publisher.publisher_cls.mandatory, 'expiration': publisher.publisher_cls.expiration, 'delivery_mode': publisher.publisher_cls.delivery_mode, 'priority': publisher.publisher_cls.priority, 'serializer': publisher.serializer } assert mock_producer.publish.call_args_list == [ call(*expected_args, **expected_kwargs) ]
class Service(object): name = "publish" publish = Publisher() @dummy def send(self, payload): tracker("send", payload) self.publish(payload, routing_key="test_queue", retry=retry)
def test_publish_custom_headers( mock_container, mock_producer, rabbit_config ): container = mock_container container.config = rabbit_config container.service_name = "srcservice" ctx_data = {'language': 'en', 'customheader': 'customvalue'} service = Mock() worker_ctx = WorkerContext( container, service, DummyProvider('method'), data=ctx_data ) publisher = Publisher(exchange=foobar_ex).bind(container, "publish") publisher.setup() # test publish msg = "msg" headers = {'nameko.language': 'en', 'nameko.customheader': 'customvalue', 'nameko.call_id_stack': ['srcservice.method.0']} service.publish = publisher.get_dependency(worker_ctx) service.publish(msg, publish_kwarg="value") expected_args = ('msg',) expected_kwargs = { 'publish_kwarg': "value", 'exchange': foobar_ex, 'headers': headers, 'declare': publisher.declare, 'retry': publisher.publisher_cls.retry, 'retry_policy': publisher.publisher_cls.retry_policy, 'compression': publisher.publisher_cls.compression, 'mandatory': publisher.publisher_cls.mandatory, 'expiration': publisher.publisher_cls.expiration, 'delivery_mode': publisher.publisher_cls.delivery_mode, 'priority': publisher.publisher_cls.priority, 'serializer': publisher.serializer } assert mock_producer.publish.call_args_list == [ call(*expected_args, **expected_kwargs) ]
class WorkerSubscriber: name = 'worker_subscriber' test = Exchange('test', type='direct') publish = Publisher(exchange=test) @event_handler("api", "say_hello") def handle_event(self, payload): print("{0} said hello!".format(payload)) self.publish("Goodbye {0}".format(payload))
def test_publish_custom_headers(empty_config, maybe_declare, patch_publisher): container = Mock(spec=ServiceContainer) container.service_name = "srcservice" container.config = empty_config ctx_data = {'language': 'en', 'customheader': 'customvalue'} service = Mock() worker_ctx = CustomWorkerContext(container, service, DummyProvider('method'), data=ctx_data) publisher = Publisher(queue=foobar_queue).bind(container, "publish") producer = Mock() connection = Mock() get_connection, get_producer = patch_publisher(publisher) get_connection.return_value = as_context_manager(connection) get_producer.return_value = as_context_manager(producer) # test declarations publisher.setup() maybe_declare.assert_called_once_with(foobar_queue, connection) # test publish msg = "msg" headers = { 'nameko.language': 'en', 'nameko.customheader': 'customvalue', 'nameko.call_id_stack': ['srcservice.method.0'] } service.publish = publisher.get_dependency(worker_ctx) service.publish(msg, publish_kwarg="value") producer.publish.assert_called_once_with(msg, headers=headers, exchange=foobar_ex, retry=True, retry_policy=DEFAULT_RETRY_POLICY, publish_kwarg="value")
def test_unserialisable_headers(rabbit_manager, rabbit_config, mock_container): vhost = rabbit_config['vhost'] container = mock_container container.service_name = "service" container.config = rabbit_config container.spawn_managed_thread = eventlet.spawn ctx_data = {'language': 'en', 'customheader': None} service = Mock() worker_ctx = WorkerContext( container, service, DummyProvider('method'), data=ctx_data ) publisher = Publisher( exchange=foobar_ex, declare=[foobar_queue]).bind(container, "publish") publisher.setup() publisher.start() with pytest.warns(UserWarning): service.publish = publisher.get_dependency(worker_ctx) service.publish("msg") messages = rabbit_manager.get_messages(vhost, foobar_queue.name) assert messages[0]['properties']['headers'] == { 'nameko.language': 'en', 'nameko.call_id_stack': ['service.method.0'], # no `customheader` }
def test_unserialisable_headers(rabbit_manager, rabbit_config): vhost = rabbit_config["vhost"] container = Mock(spec=ServiceContainer) container.service_name = "service" container.config = rabbit_config container.spawn_managed_thread = eventlet.spawn ctx_data = {"language": "en", "customheader": None} service = Mock() worker_ctx = CustomWorkerContext(container, service, DummyProvider("method"), data=ctx_data) publisher = Publisher(exchange=foobar_ex, queue=foobar_queue).bind(container, "publish") publisher.setup() publisher.start() service.publish = publisher.get_dependency(worker_ctx) service.publish("msg") messages = rabbit_manager.get_messages(vhost, foobar_queue.name) assert messages[0]["properties"]["headers"] == { "nameko.language": "en", "nameko.call_id_stack": ["service.method.0"], # no `customheader` }
class PublishService: name = "publish" commute_time_publisher = Publisher( exchange=exchange_commute_times, declare=[queue_consume_commute_times] ) nameko_rpc_publisher = Publisher(exchange=exchange_nameko_rpc, declare=[]) @rpc def publish_commute_times(self, payload): self.commute_time_publisher( payload, routing_key=ROUTING_KEY_GET_COMMUTE_TIME_NAME, reply_to=ROUTING_KEY_CONSUME_COMMUTE_TIME_NAME, ) @consume_commute_time_reply(prefetch_count=1) def handle_commute_time_reply(self, payload): print(payload)
def test_publish_custom_headers(mock_container, maybe_declare, patch_publisher): container = mock_container container.service_name = "srcservice" ctx_data = {'language': 'en', 'customheader': 'customvalue'} service = Mock() worker_ctx = WorkerContext( container, service, DummyProvider('method'), data=ctx_data ) publisher = Publisher(queue=foobar_queue).bind(container, "publish") producer = Mock() connection = Mock() get_connection, get_producer = patch_publisher(publisher) get_connection.return_value = as_context_manager(connection) get_producer.return_value = as_context_manager(producer) # test declarations publisher.setup() maybe_declare.assert_called_once_with(foobar_queue, connection) # test publish msg = "msg" headers = {'nameko.language': 'en', 'nameko.customheader': 'customvalue', 'nameko.call_id_stack': ['srcservice.method.0']} service.publish = publisher.get_dependency(worker_ctx) service.publish(msg, publish_kwarg="value") producer.publish.assert_called_once_with( msg, headers=headers, exchange=foobar_ex, retry=True, serializer=container.serializer, retry_policy=DEFAULT_RETRY_POLICY, publish_kwarg="value")
class Service4: """ Microservice responsible for receiving data notifications from Service3 and dispatching those data to the Client. Attributes: name (str): The microservice name. _redis (Redis): Nameko Redis connector object. _publication (Exchange): Messagin exchange object. _publish (Publisher): Messaging publisher object. """ name = 'service4' _redis = Redis('my_redis') _publication = Exchange('new_publication', type='direct') _publish = Publisher(exchange=_publication) @event_handler('service3', 'number_published') def receive_publication(self, payload: str): """ Event handler function receiver published number from service3 Args: payload (str): A new number published according service3 rules """ self.dispatch_publication(payload) @rpc def dispatch_publication(self, payload: str): """ Notify an event with the passed payload :param payload: A published number to be notify to the client """ self._publish(payload) @rpc def get_history(self) -> List[str]: """ Get the last 100 publications from Redis Database Returns: List[str]: Last publications """ if self._redis.llen('published_numbers') > 100: history = self._redis.lrange('published_numbers', -100, -1) else: history = self._redis.lrange('published_numbers', 0, -1) return history
def test_publish_to_rabbit(rabbit_manager, rabbit_config, mock_container): vhost = rabbit_config['vhost'] container = mock_container container.service_name = "service" container.config = rabbit_config ctx_data = {'language': 'en', 'customheader': 'customvalue'} service = Mock() worker_ctx = WorkerContext( container, service, DummyProvider('method'), data=ctx_data ) publisher = Publisher( exchange=foobar_ex, queue=foobar_queue ).bind(container, "publish") publisher.setup() publisher.start() # test queue, exchange and binding created in rabbit exchanges = rabbit_manager.get_exchanges(vhost) queues = rabbit_manager.get_queues(vhost) bindings = rabbit_manager.get_queue_bindings(vhost, foobar_queue.name) assert "foobar_ex" in [exchange['name'] for exchange in exchanges] assert "foobar_queue" in [queue['name'] for queue in queues] assert "foobar_ex" in [binding['source'] for binding in bindings] service.publish = publisher.get_dependency(worker_ctx) service.publish("msg") # test message published to queue messages = rabbit_manager.get_messages(vhost, foobar_queue.name) assert ['"msg"'] == [msg['payload'] for msg in messages] # test message headers assert messages[0]['properties']['headers'] == { 'nameko.language': 'en', 'nameko.customheader': 'customvalue', 'nameko.call_id_stack': ['service.method.0'], }
def test_publish_to_rabbit(rabbit_manager, rabbit_config, mock_container): vhost = rabbit_config['vhost'] container = mock_container container.service_name = "service" container.config = rabbit_config ctx_data = {'language': 'en', 'customheader': 'customvalue'} service = Mock() worker_ctx = WorkerContext( container, service, DummyProvider('method'), data=ctx_data ) publisher = Publisher( exchange=foobar_ex, declare=[foobar_queue] ).bind(container, "publish") publisher.setup() publisher.start() # test queue, exchange and binding created in rabbit exchanges = rabbit_manager.get_exchanges(vhost) queues = rabbit_manager.get_queues(vhost) bindings = rabbit_manager.get_queue_bindings(vhost, foobar_queue.name) assert "foobar_ex" in [exchange['name'] for exchange in exchanges] assert "foobar_queue" in [queue['name'] for queue in queues] assert "foobar_ex" in [binding['source'] for binding in bindings] service.publish = publisher.get_dependency(worker_ctx) service.publish("msg") # test message published to queue messages = rabbit_manager.get_messages(vhost, foobar_queue.name) assert ['"msg"'] == [msg['payload'] for msg in messages] # test message headers assert messages[0]['properties']['headers'] == { 'nameko.language': 'en', 'nameko.customheader': 'customvalue', 'nameko.call_id_stack': ['service.method.0'], }
def test_publish_to_rabbit(rabbit_manager, rabbit_config): vhost = rabbit_config["vhost"] container = Mock(spec=ServiceContainer) container.service_name = "service" container.config = rabbit_config container.spawn_managed_thread = eventlet.spawn ctx_data = {"language": "en", "customheader": "customvalue"} service = Mock() worker_ctx = CustomWorkerContext(container, service, DummyProvider("method"), data=ctx_data) publisher = Publisher(exchange=foobar_ex, queue=foobar_queue).bind(container, "publish") # test queue, exchange and binding created in rabbit publisher.setup() publisher.start() exchanges = rabbit_manager.get_exchanges(vhost) queues = rabbit_manager.get_queues(vhost) bindings = rabbit_manager.get_queue_bindings(vhost, foobar_queue.name) assert "foobar_ex" in [exchange["name"] for exchange in exchanges] assert "foobar_queue" in [queue["name"] for queue in queues] assert "foobar_ex" in [binding["source"] for binding in bindings] # test message published to queue service.publish = publisher.get_dependency(worker_ctx) service.publish("msg") messages = rabbit_manager.get_messages(vhost, foobar_queue.name) assert ["msg"] == [msg["payload"] for msg in messages] # test message headers assert messages[0]["properties"]["headers"] == { "nameko.language": "en", "nameko.customheader": "customvalue", "nameko.call_id_stack": ["service.method.0"], }
class OptaCollectorService(object): name = 'opta_collector' database = MongoDatabase(result_backend=False) opta = OptaDependency() error = ErrorHandler() pub_input = Publisher(exchange=Exchange(name='all_inputs', type='topic', durable=True, auto_delete=True, delivery_mode=PERSISTENT)) pub_notif = Publisher(exchange=Exchange(name='all_notifications', type='topic', durable=True, auto_delete=True, delivery_mode=PERSISTENT)) @staticmethod def _checksum(game): stats = sorted(game['player_stats'], key=lambda k: (k['player_id'], k['type'])) concat = ''.join(str(r['value']) for r in stats) return hashlib.md5(concat.encode('utf-8')).hexdigest() @staticmethod def _build_soccer_game_event_content(game): team_sides = list( set([(r['team_id'], r['side']) for r in game['team_stats']])) home_team_id = [r[0] for r in team_sides if r[1] == 'Home'][0] away_team_id = [r[0] for r in team_sides if r[1] == 'Away'][0] home_team = [ r['name'] for r in game['teams'] if r['id'] == home_team_id ][0] away_team = [ r['name'] for r in game['teams'] if r['id'] == away_team_id ][0] return { 'venue': game['venue']['name'], 'country': game['venue']['country'], 'competition': game['competition']['name'], 'season': game['season']['name'], 'name': '{} - {}'.format(home_team, away_team) } @staticmethod def _handle_referential_soccer_entity(entity, _type): entity_formatted = {'id': entity['id'], 'provider': 'opta_f9'} if _type in ('competition', 'venue', 'season', 'teams'): entity_formatted['common_name'] = entity['name'] if _type in ('competition', 'venue', 'season'): entity_formatted['type'] = ' '.join(['soccer', _type]) else: entity_formatted['type'] = 'soccer team' else: common_name = ' '.join([entity['first_name'], entity['last_name']]) if entity['known'] is not None: common_name = entity['known'] entity_formatted['common_name'] = common_name entity_formatted['type'] = ' '.join(['soccer', entity['type']]) return entity_formatted @staticmethod def _extract_referential_from_soccer_game(game): events = list() entities = list() labels = list() event_content = OptaCollectorService._build_soccer_game_event_content( game) event = { 'id': game['match_info']['id'], 'date': game['match_info']['date'].isoformat(), 'provider': 'opta_f9', 'type': 'game', 'common_name': event_content['name'], 'content': event_content, 'entities': [] } labels.append({'id': event['id'], 'label': event['common_name']}) for k in ('competition', 'season', 'venue', 'persons', 'teams'): if k in game: current_entity = game[k] if isinstance(current_entity, list): for r in current_entity: entity = OptaCollectorService._handle_referential_soccer_entity( r, k) entities.append(entity) event['entities'].append(entity) labels.append({ 'id': entity['id'], 'label': entity['common_name'] }) else: entity = OptaCollectorService._handle_referential_soccer_entity( current_entity, k) entities.append(entity) event['entities'].append(entity) labels.append({ 'id': entity['id'], 'label': entity['common_name'] }) events.append(event) return {'entities': entities, 'events': events, 'labels': labels} @staticmethod def _extract_referential_from_rugby_game(ru1, ru7): events = list() entities = list() labels = list() competition = { 'id': ru1['competition_id'], 'common_name': ru1['competition_name'], 'provider': 'opta_ru7', 'type': 'rugby competition', 'informations': None } venue = { 'id': ru1['venue_id'], 'common_name': ru1['venue'], 'provider': 'opta_ru7', 'type': 'rugby venue', 'informations': None } entities.append(venue) entities.append(competition) labels.append({ 'id': competition['id'], 'label': competition['common_name'] }) labels.append({'id': venue['id'], 'label': venue['common_name']}) event = { 'id': ru1['id'], 'date': ru1['date'].isoformat(), 'common_name': '{} - {}'.format(ru1['home_name'], ru1['away_name']), 'type': 'game', 'provider': 'opta_ru7', 'content': { 'competition': ru1['competition_name'], 'name': '{} - {}'.format(ru1['home_name'], ru1['away_name']), 'venue': ru1['venue'] }, 'entities': [venue, competition] } labels.append({'id': event['id'], 'label': event['common_name']}) for t in ru7['teams']: entity = { 'id': t['id'], 'common_name': t['name'], 'provider': 'opta_ru7', 'type': 'rugby team', 'informations': None } event['entities'].append(entity) entities.append(entity) labels.append({'id': entity['id'], 'label': entity['common_name']}) for t in ru7['players']: entity = { 'id': t['id'], 'common_name': t['name'], 'provider': 'opta_ru7', 'type': 'rugby player', 'informations': None } event['entities'].append(entity) entities.append(entity) labels.append({'id': entity['id'], 'label': entity['common_name']}) events.append(event) return {'entities': entities, 'events': events, 'labels': labels} @staticmethod def _get_opta_meta(opta_type, data_type, game_id): meta = OPTA[opta_type][data_type].copy() if 'delete_keys' in meta: meta['delete_keys'] = { 'match_id': f'f{game_id}' if opta_type == 'f9' else game_id } return meta @rpc def add_f1(self, season_id, competition_id): calendar = self.opta.get_soccer_calendar(season_id, competition_id) self.database['f1'].create_index('id') self.database['f1'].create_index('date') for row in calendar: self.database['f1'].update_one({'id': row['id']}, {'$set': row}, upsert=True) @rpc def add_ru1(self, season_id, competition_id): calendar = self.opta.get_rugby_calendar(season_id, competition_id) self.database['ru1'].create_index('id') self.database['ru1'].create_index('date') for row in calendar: self.database['ru1'].update_one({'id': row['id']}, {'$set': row}, upsert=True) @timer(interval=24 * 60 * 60) @rpc def update_all_f1(self): _log.info('Updating all f1 files ...') calendars = self.database.f1.aggregate([{ "$group": { "_id": { "season_id": "$season_id", "competition_id": "$competition_id" }, } }]) for row in calendars: try: calendar = self.opta.get_soccer_calendar( row['_id']['season_id'], row['_id']['competition_id']) except OptaWebServiceError: continue for game in calendar: self.database.f1.update_one({'id': game['id']}, {'$set': game}, upsert=True) @timer(interval=24 * 60 * 60) @rpc def update_all_ru1(self): _log.info('Updating all RU1 files ...') calendars = self.database.ru1.aggregate([{ "$group": { "_id": { "season_id": "$season_id", "competition_id": "$competition_id" }, } }]) for row in calendars: try: calendar = self.opta.get_rugby_calendar( row['_id']['season_id'], row['_id']['competition_id']) except OptaWebServiceError: continue for game in calendar: self.database.ru1.update_one({'id': game['id']}, {'$set': game}, upsert=True) def get_soccer_ids_by_dates(self, start_date, end_date): start = dateutil.parser.parse(start_date) end = dateutil.parser.parse(end_date) ids = self.database.f1.find({'date': { '$gte': start, '$lt': end }}, { 'id': 1, 'competition_id': 1, 'season_id': 1, '_id': 0 }) return list(ids) def get_soccer_ids_by_season_and_competition(self, season_id, competition_id): ids = self.database.f1.find( { 'season_id': season_id, 'competition_id': competition_id }, { 'id': 1, '_id': 0 }) return [r['id'] for r in ids] def get_f1(self, game_id): game = self.database.f1.find_one({'id': game_id}, {'_id': 0}) return game def get_rugby_ids_by_dates(self, start_date, end_date): start = dateutil.parser.parse(start_date) end = dateutil.parser.parse(end_date) ids = self.database.ru1.find({'date': { '$gte': start, '$lt': end }}, { 'id': 1, 'competition_id': 1, 'season_id': 1, '_id': 0 }) return list(ids) def get_rugby_ids_by_season_and_competition(self, season_id, competition_id): ids = self.database.ru1.find( { 'season_id': season_id, 'competition_id': competition_id }, { 'id': 1, '_id': 0 }) return [r['id'] for r in ids] def get_ru1(self, game_id): game = self.database.ru1.find_one({'id': game_id}, {'_id': 0}) return game def get_f9(self, match_id): self.database.f9.create_index('id') game = self.opta.get_soccer_game(match_id) if not game: return None checksum = self._checksum(game) old_checksum = self.database.f9.find_one({'id': match_id}, { 'checksum': 1, '_id': 0 }) if old_checksum is None: status = 'CREATED' else: if old_checksum['checksum'] != checksum: status = 'UPDATED' else: status = 'UNCHANGED' referential = self._extract_referential_from_soccer_game(game) datastore = [{ **self._get_opta_meta('f9', 'playerstat', match_id), 'records': game['player_stats'] }, { **self._get_opta_meta('f9', 'teamstat', match_id), 'records': game['team_stats'] }, { **self._get_opta_meta('f9', 'event', match_id), 'records': game['events'] }, { **self._get_opta_meta('f9', 'matchinfo', match_id), 'records': [game['match_info']] }, { **LABEL, 'records': referential['labels'] }] return { 'id': match_id, 'status': status, 'checksum': checksum, 'referential': { k: referential[k] for k in ('entities', 'events') if k in referential }, 'datastore': datastore, 'meta': { 'type': 'f9', 'source': 'opta', 'content_id': f'f{match_id}' } } def get_ru7(self, match_id): self.database.ru7.create_index('id') game = self.opta.get_rugby_game(match_id) if game: checksum = self._checksum(game) old_checksum = self.database.ru7.find_one({'id': match_id}, { 'checksum': 1, '_id': 0 }) if old_checksum is None: status = 'CREATED' else: if old_checksum['checksum'] != checksum: status = 'UPDATED' else: status = 'UNCHANGED' ru1 = self.database.ru1.find_one({'id': match_id}, {'_id': 0}) referential = self._extract_referential_from_rugby_game(ru1=ru1, ru7=game) datastore = [{ **self._get_opta_meta('ru7', 'playerstat', match_id), 'records': game['player_stats'] }, { **self._get_opta_meta('ru7', 'teamstat', match_id), 'records': game['team_stats'] }, { **self._get_opta_meta('ru7', 'event', match_id), 'records': game['events'] }, { **self._get_opta_meta('ru7', 'matchscore', match_id), 'records': [{ 'id': game['rrml']['id'], 'attendance': game['rrml']['attendance'], 'away_ht_score': game['rrml']['away_ht_score'], 'away_score': game['rrml']['away_score'], 'home_ht_score': game['rrml']['home_ht_score'], 'home_score': game['rrml']['home_score'] }] }, { **self._get_opta_meta('ru7', 'matchinfo', match_id), 'records': [{ 'venue_id': ru1['venue_id'], 'date': ru1['date'], 'season_id': ru1['season_id'], 'id': ru1['id'], 'group_name': ru1['group_name'], 'group_id': ru1['group_id'], 'round': ru1['round'], 'competition_id': ru1['competition_id'] }] }, { **LABEL, 'records': referential['labels'] }] return { 'id': match_id, 'status': status, 'checksum': checksum, 'datastore': datastore, 'referential': { k: referential[k] for k in ('entities', 'events') if k in referential }, 'meta': { 'type': 'ru7', 'source': 'opta' } } return None def get_f40(self, season_id, competition_id): squads = self.opta.get_soccer_squads(season_id, competition_id) if not squads: return None meta = OPTA['f40'] def get_fields(k): return set((m[0] for m in meta[k]['meta'])) datastore = [ { **meta['playerinfo'], 'records': [{k: v for k, v in p.items() if k in get_fields('playerinfo')} for t in squads for p in t['players']] }, { **meta['teaminfo'], 'records': [{k: v for k,v in t.items() if k in get_fields('teaminfo')} for t in squads] }, { **meta['link'], 'records': [{ 'id': hashlib.md5( ''.join([p['id'], t['id'], t['season_id'], t['competition_id']])\ .encode('utf-8')).hexdigest(), 'competition_id': t['competition_id'], 'season_id': t['season_id'], 'player_id': p['id'], 'team_id': t['id'], 'join_date': p['join_date'] } for t in squads for p in t['players']] } ] content_id = ','.join([season_id, competition_id]) return { 'id': content_id, 'status': 'UPDATED', 'checksum': None, 'referential': { 'informations': list( itertools.chain(datastore[0]['records'], [{ k: v for k, v in t.items() if k in get_fields('teaminfo').union({'team_kits'}) } for t in squads])) }, 'datastore': datastore, 'meta': { 'type': 'f40', 'source': 'opta', 'content_id': content_id } } def ack_f9(self, match_id, checksum): self.database.f9.update_one({'id': match_id}, {'$set': { 'checksum': checksum }}, upsert=True) @rpc def unack_f9(self, match_id): self.database.f9.delete_one({'id': match_id}) def ack_ru7(self, match_id, checksum): self.database.ru7.update_one({'id': match_id}, {'$set': { 'checksum': checksum }}, upsert=True) @rpc def unack_ru7(self, match_id): self.database.ru7.delete_one({'id': match_id}) @timer(interval=5 * 60) @rpc def publish(self, days_offset=3): _log.info(f'Loading opta games for the last {days_offset} days ...') now = datetime.datetime.utcnow() start = now - datetime.timedelta(days=days_offset) end = now + datetime.timedelta(seconds=60 * 110) _log.info( f'Retrieving game ids from {start.isoformat()} to {end.isoformat()} ...' ) games = itertools.chain([('soccer', i) for i in self.get_soccer_ids_by_dates( start.isoformat(), end.isoformat())], [('rugby', i) for i in self.get_rugby_ids_by_dates( start.isoformat(), end.isoformat())]) def handle_game(game): t, i = game try: feed = self.get_f9(i['id']) if t == 'soccer' else self.get_ru7( i['id']) except OptaWebServiceError: _log.warning(f'Game {i["id"]} could not be retrieved!') return False if feed and feed['status'] != 'UNCHANGED': _log.info(f'Publishing {game} files ...') self.pub_input(bson.json_util.dumps(feed)) return True return False def handle_competition(competition): t, comp, season = competition try: feed = self.get_f40(season, comp) if t == 'soccer' else None except OptaWebServiceError: _log.warning( f'Competition {comp}/{season} could not be retrieved!') return if feed: _log.info(f'Publishing {comp}/{season} files ...') self.pub_input(bson.json_util.dumps(feed)) competitions = set() for g in games: handled = handle_game(g) if handled: competitions.add( (g[0], g[1]['competition_id'], g[1]['season_id'])) for c in competitions: handle_competition(c) @event_handler('loader', 'input_loaded', handler_type=BROADCAST, reliable_delivery=False) def ack(self, payload): msg = bson.json_util.loads(payload) meta = msg.get('meta', None) if not meta: return checksum = msg.get('checksum', None) if 'type' not in meta or 'source' not in meta or meta[ 'source'] != 'opta': return t = meta['type'] def publish_notification(t, game, id_): _log.info(f'Publishing notification for {id_}') self.pub_notif( bson.json_util.dumps({ 'id': id_, 'source': 'opta', 'type': t, 'content': f'{game["home_name"]} - {game["away_name"]}' })) if t == 'f9': if checksum: _log.info(f'Acknowledging {t} file: {msg["id"]}') self.ack_f9(msg['id'], checksum) game = self.get_f1(msg['id']) publish_notification(t, game, msg['id']) else: _log.warning( f'Received an event {t} {msg["id"]} without checksum') elif t == 'ru7': if checksum: _log.info(f'Acknowledging {t} file: {msg["id"]}') self.ack_ru7(msg['id'], checksum) game = self.get_ru1(msg['id']) publish_notification(t, game, msg['id']) else: _log.warning( f'Received an event {t} {msg["id"]} without checksum') elif t == 'f40': _log.info(f'Acknowledging {t} file: {msg["id"]}') self.pub_notif( bson.json_util.dumps({ 'id': msg['id'], 'source': 'opta', 'type': t, 'content': 'Squads loaded' })) else: return @event_handler('api_service', 'input_config', handler_type=BROADCAST, reliable_delivery=False) def handle_input_config(self, payload): msg = bson.json_util.loads(payload) if 'meta' not in msg or 'source' not in msg[ 'meta'] or msg['meta']['source'] != 'opta': return if 'type' not in msg['meta']: _log.warning('type is missing within meta') type_ = msg['meta']['type'] _log.info('Received a related input config ...') if 'config' not in msg: _log.warning('No config within the message. Ignoring ...') config = msg['config'] if 'season' not in config or 'competition' not in config: _log.warning( 'Either competition or season is missing within config') if type_ == 'f1': self.add_f1(config['season'], config['competition']) elif type_ == 'ru1': self.add_ru1(config['season'], config['competition']) else: _log.warning('type should be either f1 or ru1') return self.pub_notif( bson.json_util.dumps({ 'id': ','.join([config['season'], config['competition']]), 'source': msg['meta']['source'], 'type': type_, 'content': 'A new Opta feed has been added.' }))
class ProductsService: name = 'products' cache = Cache() config = Config() dispatch = EventDispatcher() order_product_publisher = Publisher(queue=order_queue) calculate_taxes_publisher = Publisher(exchange=orders_exchange) @http('GET', '/products/<int:product_id>') def get_product(self, request, product_id): """ Get product from local cache """ product = self.cache.get(product_id) if not product: return 404, json.dumps({ 'error': 'NOT_FOUND', 'message': 'Product not found' }) return Product(strict=True).dumps(self.cache.get(product_id)).data @http('POST', '/products') def add_product(self, request): """ Add product to cache in every region This endpoint can be called in any region and will dispatch event which will be handled by indexer's `handle_product_added` in all regions """ try: payload = Product(strict=True).loads( request.get_data(as_text=True) ).data except ValidationError as err: return 400, json.dumps({ 'error': 'BAD_REQUEST', 'message': err.messages }) self.dispatch('product_added', Product(strict=True).dump(payload).data) return 200, '' @http('POST', '/orders') def order_product(self, request): """ HTTP entrypoint for ordering products This entrypoint can be called in any region but message will be published on a federated `fed.order_product` queue that is only consumed in `europe` region where master database and service with write permissions to it lives. """ try: payload = Order(strict=True).loads( request.get_data(as_text=True) ).data except ValidationError as err: return 400, json.dumps({ 'error': 'BAD_REQUEST', 'message': err.messages }) self.order_product_publisher( payload, routing_key=ROUTING_KEY_ORDER_PRODUCT ) return 200, '' @consume(queue=order_queue) def consume_order(self, payload): """ Consumes order payloads For our example, this consumer is only enabled in `europe` region. `asia` and `america` regions have this consumer disabled by setting `ENTRYPOINT_BLACKLIST` config value to `consume_order`. Custom implementation of ServiceContainer (container.py) uses this value to blacklist specific entrypoints. """ logging.info("Consuming order") product = self.cache.get(payload['product_id']) product['quantity'] -= payload['quantity'] # Write to master database here... self.dispatch( 'product_updated', Product(strict=True).dump(product).data ) @http('POST', '/tax/<string:remote_region>') def calculate_tax(self, request, remote_region): """ Send tax calculation request to desired region. """ this_regions = self.config['REGION'] payload = {'order_id': 1} self.calculate_taxes_publisher( payload, routing_key="{}_{}".format( remote_region, ROUTING_KEY_CALCULATE_TAXES ), reply_to="{}_{}".format( this_regions, ROUTING_KEY_CALCULATE_TAXES_REPLY ) ) return 200, '' @consume_reply() def consume_tax_calculation(self, payload): """ Consume tax calculation responses coming back from any regions. """ logging.info(payload)
class ElectionCollectorService(object): name = 'election_collector' database = MongoDatabase(result_backend=False) election = Election() error = ErrorHandler() pub_input = Publisher(exchange=Exchange(name='all_inputs', type='topic', durable=True, auto_delete=True, delivery_mode=PERSISTENT)) pub_notif = Publisher(exchange=Exchange(name='all_notifications', type='topic', durable=True, auto_delete=True, delivery_mode=PERSISTENT)) def add_election(self, election_id): self.database['elections'].update_one({'id': election_id}, {'$set': { 'id': election_id }}, upsert=True) @staticmethod def handle_missing_number(doc, key): if key not in doc: return None d = doc[key] if 'Nombre' in doc[key]: d = doc[key]['Nombre'] try: return int(d) except ValueError: return None @staticmethod def to_boolean(doc, key): if key not in doc: return None if doc[key] == 'O': return True return False @staticmethod def extract_scrutin(doc): _log.info('Handling scrutin informations ...') return { 'scrutin_type': doc['Type'], 'scrutin_annee': int(doc['Annee']) } @staticmethod def extract_commune(doc): _log.info('Handling commune informations ...') return { 'commune_code': doc['CodSubCom'], 'commune_lib': doc['LibSubCom'], 'circonscription_code': doc.get('CodCirLg', None), 'circonscription_lib': doc.get('LibFraSubCom', None), 'mode_scrutin': doc.get('ModeScrutin', None) } @staticmethod def extract_tour(doc): _log.info('Handling tour informations ...') return {'num_tour': int(doc['NumTour'])} @staticmethod def extract_mention(doc): _log.info('Handling mention informations ...') return { 'inscrits': int(doc['Inscrits']['Nombre']), 'abstentions': int(doc['Abstentions']['Nombre']), 'votants': int(doc['Votants']['Nombre']), 'blancs': ElectionCollectorService.handle_missing_number(doc, 'Blancs'), 'nuls': ElectionCollectorService.handle_missing_number(doc, 'Nuls'), 'blancs_nuls': ElectionCollectorService.handle_missing_number( doc, 'BlancsOuNuls'), 'exprimes': int(doc['Exprimes']['Nombre']) } @staticmethod def extract_resultats(doc): _log.info('Handling resultats informations ...') return { 'nb_sap': ElectionCollectorService.handle_missing_number(doc, 'NbSap'), 'nb_sp': ElectionCollectorService.handle_missing_number( doc, 'NbSiePourvus') } @staticmethod def extract_departement(doc): _log.info('Handling departement information ...') return { 'departement_code': doc['CodDpt'], 'departement_lib': doc['LibDpt'] } @staticmethod def extract_nuance(doc): _log.info('Handling nuance information ...') return { 'nuance_code': doc['CodNua'], 'nuance_lib': doc['LibNua'], 'nb_voix': int(doc['NbVoix']), 'nuance_nb_siege': ElectionCollectorService.handle_missing_number(doc, 'NbSieges') } @staticmethod def extract_candidat(doc): _log.info('Handling candidat information ...') return { 'candidat_numero': ElectionCollectorService.handle_missing_number( doc, 'NumPanneauCand'), 'candidat_nom': doc['NomPsn'], 'candidat_prenom': doc['PrenomPsn'], 'candidat_civilite': doc['CivilitePsn'], 'candidat_ordre': ElectionCollectorService.handle_missing_number( doc, 'NumeroOrdCand'), 'candidat_elu': ElectionCollectorService.to_boolean(doc, 'Elu'), 'nuance_code': doc.get('CodNua', None), 'nuance_lib': doc.get('LibNua', None), 'nb_voix': int(doc['NbVoix']) } @staticmethod def extract_liste(doc): _log.info('Handling liste information ...') return { 'liste_code': doc['CodSeqLisCand'], 'liste_lib': doc['NomListe'], 'liste_tete_nom': doc.get('NomTeteListe', None), 'liste_tete_prenom': doc.get('PrenomTeteListe', None), 'liste_tete_civilite': doc.get('CiviliteTeteListe', None), 'liste_nb_elus': ElectionCollectorService.handle_missing_number(doc, 'NbSieges'), 'nb_voix': int(doc['NbVoix']) } @staticmethod def complete_records(records, extract_func, rec_type, prev): def create_record(r): c = extract_func(r) c.update(prev) c['type'] = rec_type for m in [meta[0] for meta in META]: if m not in c: c[m] = None return c return [create_record(r) for r in records] @staticmethod def build_records(doc, er): _log.info(f'Building data from {er.url}') election = doc['Election'] root = ElectionCollectorService.extract_scrutin(election['Scrutin']) root['election_id'] = er.election_id root['feed_id'] = er.feed_id def ensure_list(d): if isinstance(d, list): return d return [d] def handle_tour(t, prev): t_lvl = ElectionCollectorService.extract_tour(t) t_lvl.update(prev) t_lvl.update( ElectionCollectorService.extract_mention(t['Mentions'])) res = t['Resultats'] t_lvl.update(ElectionCollectorService.extract_resultats(res)) if 'Nuances' in res: return ElectionCollectorService.complete_records( res['Nuances']['Nuance'], ElectionCollectorService.extract_nuance, 'N', t_lvl) elif 'Listes' in res: return ElectionCollectorService.complete_records( res['Listes']['Liste'], ElectionCollectorService.extract_liste, 'L', t_lvl) elif 'Candidats' in res: return ElectionCollectorService.complete_records( res['Candidats']['Candidat'], ElectionCollectorService.extract_candidat, 'C', t_lvl) else: raise ElectionCollectorError( 'Cannot find neither Nuances, Listes nor Candidats under Resultats' ) def handle_commune(c, prev): c_lvl = ElectionCollectorService.extract_commune(c) c_lvl.update(prev) return list( itertools.chain.from_iterable([ handle_tour(t, c_lvl) for t in ensure_list(c['Tours']['Tour']) ])) if 'Departement' in election: root.update( ElectionCollectorService.extract_departement( election['Departement'])) return list( itertools.chain.from_iterable([ handle_commune(c, root) for c in ensure_list( election['Departement']['Communes']['Commune']) ])) return list( itertools.chain.from_iterable([ handle_tour(t, root) for t in ensure_list(election['Tours']['Tour']) ])) def update_checksum(self, id_, checksum): self.database['elections'].update_one({'id': id_}, {'$set': { 'checksum': checksum }}) @rpc def publish(self, election_id): _log.info(f'Publishing election {election_id} ...') for r in self.election.results(election_id): _log.info(f'Getting {r.url} ...') doc = r.call() try: records = ElectionCollectorService.build_records(doc, r) except ElectionCollectorError as e: _log.error(f'Error on {r.url}: {str(e)}') continue data = { 'referential': {}, 'datastore': [{ 'write_policy': 'delete_bulk_insert', 'meta': META, 'target_table': 'french_election', 'delete_keys': { 'feed_id': r.feed_id }, 'records': records, 'chunk_size': 100 }], 'id': r.feed_id, 'status': 'CREATED', 'checksum': None, 'meta': { 'type': 'election', 'source': 'interieur', 'content_id': r.feed_id } } self.pub_input(bson.json_util.dumps(data)) @event_handler('loader', 'input_loaded', handler_type=BROADCAST, reliable_delivery=False) def ack(self, payload): msg = bson.json_util.loads(payload) meta = msg.get('meta', None) if not meta: return if 'type' not in meta or 'source' not in meta or meta[ 'source'] != 'interieur': return self.pub_notif( bson.json_util.dumps({ 'id': msg['id'], 'source': meta['source'], 'type': meta['type'], 'content': 'French election loaded!' })) @staticmethod def is_meta_valid(msg): if 'meta' not in msg: return False if 'type' not in msg['meta'] or 'source' not in msg['meta']: return False if msg['meta']['type'] != 'election' or msg['meta'][ 'source'] != 'interieur': return False if 'config' not in msg: _log.warning('Missing config within the message. Ignoring ...') return False if 'election' not in msg['config']: _log.error('Missing mandatory field: election') return False return True @event_handler('api_service', 'input_config', handler_type=BROADCAST, reliable_delivery=False) def handle_input_config(self, payload): msg = bson.json_util.loads(payload) if not ElectionCollectorService.is_meta_valid(msg): return election_id = msg['config']['election'] _log.info('Received a related input config ...') self.add_election(election_id) self.publish(election_id)
class SDMXCollectorService(object): name = 'sdmx_collector' database = MongoDatabase(result_backend=False) sdmx = SDMX() error = ErrorHandler() pub_input = Publisher(exchange=Exchange(name='all_inputs', type='topic', durable=True, auto_delete=True, delivery_mode=PERSISTENT)) pub_notif = Publisher(exchange=Exchange(name='all_notifications', type='topic', durable=True, auto_delete=True, delivery_mode=PERSISTENT)) def add_dataflow(self, root_url, agency_id, resource_id, version, kind, keys): try: self.sdmx.initialize(root_url, agency_id, resource_id, version, kind, keys) except Exception as e: raise SDMXCollectorError(str(e)) self.database['dataset'].create_index([('agency', pymongo.ASCENDING), ('resource', pymongo.ASCENDING) ]) self.database['dataset'].create_index('id') _id = SDMXCollectorService.table_name(agency_id, resource_id) doc = { 'agency': agency_id, 'resource': resource_id, 'id': _id, 'version': version, 'kind': kind, 'keys': keys or {}, 'root_url': root_url } self.database['dataset'].update_one( { 'agency': agency_id, 'resource': resource_id }, {'$set': doc}, upsert=True) return _id def get_dataflows(self): return self.database['dataset'].find({}) @staticmethod def clean(l): return re.sub(r'[^0-9a-zA-Z_]+', '_', l) @staticmethod def table_name(provider, dataflow): return f'{SDMXCollectorService.clean(provider.lower())}_{SDMXCollectorService.clean(dataflow.lower())}' @staticmethod def to_table_meta(meta, provider, dataflow): table_name = SDMXCollectorService.table_name(provider, dataflow) codelist = meta['codelist'] def handle_dim_att(d, is_dim=True): name, code = d cl = [c for c in codelist if c[0] == code] if not cl: return (SDMXCollectorService.clean(name), 'TEXT') if is_dim: size = functools.reduce( lambda x, y: len(y[1]) if len(y[1]) > x else x, cl, 1) return (SDMXCollectorService.clean(name), f'VARCHAR({size})') size = functools.reduce(lambda x, y: x + len(y[1]), cl, 0) return (SDMXCollectorService.clean(name), f'VARCHAR({size})') table_meta = [handle_dim_att(d) for d in meta['dimensions'] if d[1]] table_meta = table_meta + [ handle_dim_att(d, is_dim=False) for d in meta['attributes'] if d[1] ] table_meta.append((SDMXCollectorService.clean(meta['time_dimension']), 'VARCHAR(20)')) table_meta.append( (SDMXCollectorService.clean(meta['primary_measure']), 'FLOAT')) table_meta.append(('query', 'VARCHAR(250)')) return { 'write_policy': 'delete_bulk_insert', 'meta': table_meta, 'target_table': table_name, 'chunk_size': 500, 'delete_keys': { 'query': meta['query'] } } @staticmethod def codelist_table_meta(agency): return { 'write_policy': 'upsert', 'meta': [('code_id', 'VARCHAR(250)'), ('id', 'VARCHAR(250)'), ('code_name', 'TEXT'), ('name', 'TEXT'), ('ref', 'VARCHAR(32)')], 'target_table': f'{SDMXCollectorService.clean(agency).lower()}_codelist', 'upsert_key': 'ref' } @staticmethod def checksum(data): return hashlib.md5(''.join([str(r) for r in data ]).encode('utf-8')).hexdigest() def get_status(self, provider, dataflow, checksum): old = self.database['dataset'].find_one({ 'agency': provider, 'resource': dataflow }) if not old or 'checksum' not in old: return 'CREATED' if old['checksum'] == checksum: return 'UNCHANGED' return 'UPDATED' @staticmethod def dataflow_to_entity(df): return { 'id': df['id'], 'common_name': df['name'], 'provider': df['structure']['agency_id'], 'type': 'dataflow', 'informations': df } def get_dataset(self, root_url, agency, resource, version, kind, keys): self.sdmx.initialize(root_url, agency, resource, version, kind, keys) meta = { 'name': self.sdmx.name(), 'codelist': self.sdmx.codelist(), 'dimensions': self.sdmx.dimensions(), 'attributes': self.sdmx.attributes(), 'primary_measure': self.sdmx.primary_measure(), 'time_dimension': self.sdmx.time_dimension(), 'query': self.sdmx.query() } table_meta = SDMXCollectorService.to_table_meta(meta, agency, resource) def handle_number(m, v): if m.lower() in ('float', 'double'): try: d = float(v) if math.isnan(d): return None return d except: return None return v data = [{ k[0]: handle_number( k[1], r.get(k[0], None) if k[0] != 'query' else meta['query']) for k in table_meta['meta'] } for r in self.sdmx.data()] codelist_meta = SDMXCollectorService.codelist_table_meta(agency) def hash_row(row): h = hashlib.md5(str(row).encode('utf-8')) return h.hexdigest() codelist = [ dict( zip([m[0] for m in codelist_meta['meta']], r + (hash_row(r[0:2]), ))) for r in meta['codelist'] ] checksum = SDMXCollectorService.checksum(data) return { 'referential': { 'entities': [{ 'id': table_meta['target_table'], 'common_name': meta['name'], 'provider': 'internal', 'type': 'dataset', 'informations': { 'id': table_meta['target_table'], 'name': meta['name'], 'table': table_meta['target_table'] } }, *[ SDMXCollectorService.dataflow_to_entity(d) for d in self.sdmx.agency_dataflows ]] }, 'datastore': [{ **table_meta, 'records': data }, { **codelist_meta, 'records': codelist }], 'checksum': checksum, 'id': table_meta['target_table'], 'status': self.get_status(agency, resource, checksum), 'meta': { 'type': SDMXCollectorService.clean(agency).lower(), 'source': 'sdmx' } } def update_checksum(self, id_, checksum): self.database['dataset'].update_one({'id': id_}, {'$set': { 'checksum': checksum }}) @timer(interval=24 * 60 * 60) @rpc def publish(self): for f in self.get_dataflows(): agency = f['agency'] resource = f['resource'] root_url = f['root_url'] version = f['version'] kind = f['kind'] keys = f['keys'] _log.info( f'Downloading dataset {resource} provided by {agency} ...') try: dataset = self.get_dataset(root_url, agency, resource, version, kind, keys) except Exception as e: _log.error( f'Can not handle dataset {resource} provided by {agency}: {str(e)}' ) continue _log.info('Publishing ...') self.pub_input(bson.json_util.dumps(dataset)) @event_handler('loader', 'input_loaded', handler_type=BROADCAST, reliable_delivery=False) def ack(self, payload): msg = bson.json_util.loads(payload) meta = msg.get('meta', None) if not meta: return checksum = msg.get('checksum', None) if not checksum: return if 'source' not in meta or meta['source'] != 'sdmx': return t = meta['type'] _log.info(f'Acknowledging {t} file: {msg["id"]}') self.update_checksum(msg['id'], checksum) _log.info(f'Publishing notification for {msg["id"]}') self.pub_notif( bson.json_util.dumps({ 'id': msg['id'], 'source': 'sdmx', 'type': t, 'content': msg["id"] })) @event_handler('api_service', 'input_config', handler_type=BROADCAST, reliable_delivery=False) def handle_input_config(self, payload): msg = bson.json_util.loads(payload) if 'meta' not in msg or 'source' not in msg[ 'meta'] or msg['meta']['source'] != 'sdmx': return _log.info('Received a related input config ...') if 'config' not in msg: _log.warning('No config within the message. Ignoring ...') return config = msg['config'] if 'agency' not in config or 'resource' not in config or\ 'version' not in config or 'kind' not in config or 'keys' not in config\ or 'root_url' not in config: _log.error( 'Missing at least one of these mandatory fields: root_url, agency, resource, version, kind or keys' ) return id_ = self.add_dataflow(config['root_url'], config['agency'], config['resource'], config['version'], config['kind'], config['keys']) self.pub_notif( bson.json_util.dumps({ 'id': id_, 'source': msg['meta']['source'], 'type': '', 'content': 'A new SDMX feed has been added.' })) self.publish()
class FakeCollectorService(object): name = 'fake_collector' error = ErrorHandler() pub_input = Publisher(exchange=Exchange(name='all_inputs', type='topic', durable=True, auto_delete=True, delivery_mode=PERSISTENT)) pub_notif = Publisher(exchange=Exchange(name='all_notifications', type='topic', durable=True, auto_delete=True, delivery_mode=PERSISTENT)) @rpc def publish(self): status = random.choice(['CREATED', 'UPDATED', 'DELETED', 'UNCHANGED']) checksum = str(uuid.uuid4()) id_ = str(uuid.uuid4()) referential = { 'events': [{ 'id': id_, 'date': datetime.datetime.utcnow(), 'common_name': f'fake_event_{id_}', 'provider': 'fake', 'type': 'fake', 'content': {}, 'entities': [] }] } datastore = [{ 'write_policy': 'insert', 'meta': [('ID', 'VARCHAR(36)'), ('VALUE', 'FLOAT')], 'target_table': 'fake', 'records': [{ 'id': id_, 'value': random.uniform(0, 1) }] }] event = { 'id': str(uuid.uuid4()), 'status': status, 'checksum': checksum, 'referential': referential, 'datastore': datastore, 'meta': { 'source': 'fake' } } _log.info(f'Publishing event {event}') self.pub_input(dumps(event)) return event @event_handler('loader', 'input_loaded') def ack(self, payload): _log.info(payload) msg = loads(payload) if 'meta' not in msg: return meta = msg['meta'] if 'source' not in meta or meta['source'] != 'fake': return _log.info('Publishing notification ...') self.pub_notif( dumps({ 'id': msg['id'], 'source': self.name, 'type': 'fake', 'content': 'nice fake message' }))