def test_restart_sync(self, mock_spawn, mock_sync_tenders): mock_spawn.return_value = mock.MagicMock() mock_spawn.return_value.kill = mock.MagicMock('kill result') mock_sync_tenders.return_value = self.response self.resource_feeder = ResourceFeeder() self.resource_feeder.init_api_clients() self.resource_feeder.start_sync() self.resource_feeder.restart_sync() self.assertEqual(mock_spawn.return_value.kill.call_count, 2)
def test_retriever_forward_wrong_cookies(self, mock_get_response): mock_get_response.return_value = self.response self.resource_feeder = ResourceFeeder() self.resource_feeder.init_api_clients() self.resource_feeder.backward_params = {"limit": 0} self.resource_feeder.backward_client = mock.MagicMock() self.resource_feeder.cookies = mock.MagicMock() self.resource_feeder.backward_client.session.cookies = mock.MagicMock() with self.assertRaises(Exception) as e: self.resource_feeder.retriever_forward() self.assertEqual(e.exception.message, 'LB Server mismatch')
def test_retriever_backward(self, mock_get_response): mock_get_response.side_effect = [self.response, munchify({'data': {}})] self.resource_feeder = ResourceFeeder() self.resource_feeder.init_api_clients() self.resource_feeder.backward_params = {"limit": 0} self.resource_feeder.backward_client = mock.MagicMock() self.resource_feeder.cookies = self.resource_feeder.backward_client.session.cookies self.resource_feeder.retriever_backward() self.assertEqual(self.resource_feeder.backward_params['offset'], self.response.next_page.offset) self.assertEqual( self.resource_feeder.backward_info['resource_item_count'], 0) self.assertEqual(self.resource_feeder.backward_info['status'], 0)
def test_feeder(self, mock_sleep, mock_spawn, mock_sync_tenders): mock_sleep.return_value = 'sleeping' mock_sync_tenders.side_effect = [ self.response, self.response, ConnectionError('conn error') ] self.resource_feeder = ResourceFeeder() mock_spawn.return_value = mock.MagicMock() mock_spawn.return_value.value = 1 mock_spawn.return_value.ready.side_effect = [True, False, False, True] with self.assertRaises(ConnectionError) as e: self.resource_feeder.feeder() self.assertEqual(e.exception.message, 'conn error') self.assertEqual(mock_sleep.call_count, 1)
def test_start_sync(self, mock_spawn, mock_sync_tenders): mock_spawn.return_value = 'spawn result' mock_sync_tenders.return_value = self.response self.resource_feeder = ResourceFeeder() self.resource_feeder.init_api_clients() self.resource_feeder.start_sync() self.assertEqual(self.resource_feeder.backward_params['offset'], self.response.next_page.offset) self.assertEqual(self.resource_feeder.forward_params['offset'], self.response.prev_page.offset) self.assertEqual(self.resource_feeder.forward_params['offset'], self.response.prev_page.offset) self.assertEqual(mock_spawn.call_count, 2) mock_spawn.assert_called_with(self.resource_feeder.retriever_forward) self.assertEqual(self.resource_feeder.backward_worker, 'spawn result') self.assertEqual(self.resource_feeder.forward_worker, 'spawn result')
class AuctionsDataBridge(object): """Auctions Data Bridge""" def __init__(self, config, re_planning=False, debug=False): super(AuctionsDataBridge, self).__init__() self.config = config self.tenders_ids_list = [] self.tz = tzlocal() self.debug = debug self.mapper = components.qA(self, IAuctionsManager) self.re_planning = re_planning DEFAULT_RETRIEVERS_PARAMS.update( self.config.get('main').get('retrievers_params', {})) self.couch_url = urljoin( self.config_get('couch_url'), self.config_get('auctions_db') ) self.db = Database(self.couch_url, session=Session(retry_delays=range(10))) sync_design(self.db) self.feeder = ResourceFeeder( host=self.config_get('resource_api_server'), resource=self.config_get('resource_name'), version=self.config_get('resource_api_version'), key='', extra_params=API_EXTRA, retrievers_params=DEFAULT_RETRIEVERS_PARAMS ) def config_get(self, name): return self.config['main'][name] def run(self): if self.re_planning: self.run_re_planning() return LOGGER.info('Start Auctions Bridge', extra={'MESSAGE_ID': DATA_BRIDGE_PLANNING_START_BRIDGE}) LOGGER.info('Start data sync...', extra={'MESSAGE_ID': DATA_BRIDGE_PLANNING_DATA_SYNC}) for item in self.feeder.get_resource_items(): # magic goes here feed = FeedItem(item) planning = self.mapper(feed) if not planning: continue for cmd, item_id, lot_id in planning: if lot_id: LOGGER.info('Lot {} of tender {} selected for {}'.format( lot_id, item_id, cmd)) else: LOGGER.info('Tender {} selected for {}'.format(item_id, cmd)) planning(cmd, item_id, lot_id=lot_id) def run_re_planning(self): pass
def test_retriever_forward(self, mock_get_response): mock_get_response.side_effect = [ self.response, self.response, ConnectionError('connection error') ] self.resource_feeder = ResourceFeeder() self.resource_feeder.init_api_clients() self.resource_feeder.forward_params = {"limit": 0} self.resource_feeder.forward_client = mock.MagicMock() self.resource_feeder.forward_client.session.cookies = self.resource_feeder.cookies with self.assertRaises(ConnectionError) as e: self.resource_feeder.retriever_forward() self.assertEqual(e.exception.message, 'connection error') self.assertEqual(self.resource_feeder.forward_params['offset'], self.response.next_page.offset) self.assertEqual( self.resource_feeder.forward_info['resource_item_count'], 3) self.assertEqual(self.resource_feeder.forward_info['status'], 3)
def test_init_api_clients(self): self.resource_feeder = ResourceFeeder() self.resource_feeder.init_api_clients() self.assertEqual( self.resource_feeder.backward_params, { 'descending': True, 'feed': 'changes', 'opt_fields': 'status', 'mode': '_all_' }) self.assertEqual(self.resource_feeder.forward_params, { 'feed': 'changes', 'opt_fields': 'status', 'mode': '_all_' }) self.assertEqual(self.resource_feeder.forward_info, {'status': 1}) self.assertEqual(self.resource_feeder.backward_info, {'status': 1}) self.assertEqual(self.resource_feeder.forward_client.session.cookies, self.resource_feeder.backward_client.session.cookies)
def test_get_resource_items_non_zero_value(self, mock_spawn, mock_sync_tenders): mock_sync_tenders.side_effect = [ self.response, munchify({ 'data': {}, 'next_page': { 'offset': 'next_page' }, 'prev_page': { 'offset': 'next_page' } }) ] mock_spawn.return_value = mock.MagicMock() mock_spawn.return_value.value = 1 self.resource_feeder = ResourceFeeder() mock_spawn.return_value.ready.side_effect = [True, False] with mock.patch('__builtin__.True', AlmostAlwaysTrue(4)): result = self.resource_feeder.get_resource_items() self.assertEqual(tuple(result), tuple(self.response.data))
def __init__(self, config): super(AuctionsDataBridge, self).__init__() self.config = config self.tenders_ids_list = [] self.tz = tzlocal() DEFAULT_RETRIEVERS_PARAMS.update(self.config.get('main').get('retrievers_params', {})) self.couch_url = urljoin( self.config_get('couch_url'), self.config_get('auctions_db') ) self.db = Database(self.couch_url, session=Session(retry_delays=range(10))) sync_design(self.db) self.feeder = ResourceFeeder( host=self.config_get('tenders_api_server'), resource='tenders', version=self.config_get('tenders_api_version'), key='', extra_params={'opt_fields': 'status,auctionPeriod,lots', 'mode': '_all_'}, retrievers_params=DEFAULT_RETRIEVERS_PARAMS )
def test_instance_initialization(self): self.resource_feeder = ResourceFeeder() self.assertEqual(self.resource_feeder.key, '') self.assertEqual(self.resource_feeder.host, 'https://lb.api-sandbox.openprocurement.org/') self.assertEqual(self.resource_feeder.version, '2.3') self.assertEqual(self.resource_feeder.resource, 'tenders') self.assertEqual(self.resource_feeder.adaptive, False) self.assertEqual(self.resource_feeder.extra_params, { 'opt_fields': 'status', 'mode': '_all_' }) self.assertEqual( self.resource_feeder.retrievers_params, { 'down_requests_sleep': 5, 'up_requests_sleep': 1, 'up_wait_sleep': 30, 'up_wait_sleep_min': 5, 'queue_size': 101 }) self.assertIsInstance(self.resource_feeder.queue, Queue) self.assertEqual(self.resource_feeder.forward_info, {}) self.assertEqual(self.resource_feeder.backward_info, {})
def test_retriever_forward_no_data_adaptive(self, mock_get_response): mock_get_response.side_effect = [ munchify({ 'data': {}, 'next_page': { 'offset': 'next_page' } }), munchify({ 'data': {}, 'next_page': { 'offset': 'next_page' } }), ConnectionError('connection error') ] self.resource_feeder = ResourceFeeder(adaptive=True, retrievers_params={ 'down_requests_sleep': 5, 'up_requests_sleep': 1, 'up_wait_sleep': 15, 'up_wait_sleep_min': 5, 'queue_size': 101 }) self.resource_feeder.init_api_clients() self.resource_feeder.forward_params = {"limit": 0} self.resource_feeder.forward_client = mock.MagicMock() self.resource_feeder.forward_client.session.cookies = self.resource_feeder.cookies with self.assertRaises(ConnectionError) as e: self.resource_feeder.retriever_forward() self.assertEqual(e.exception.message, 'connection error') self.assertEqual(self.resource_feeder.forward_params['offset'], 'next_page') self.assertEqual( self.resource_feeder.forward_info['resource_item_count'], 0) self.assertEqual(self.resource_feeder.forward_info['status'], 3)
def __init__(self, config, re_planning=False, debug=False): super(AuctionsDataBridge, self).__init__() self.config = config self.tenders_ids_list = [] self.tz = tzlocal() self.debug = debug self.mapper = components.qA(self, IAuctionsManager) self.re_planning = re_planning DEFAULT_RETRIEVERS_PARAMS.update( self.config.get('main').get('retrievers_params', {})) self.couch_url = urljoin( self.config_get('couch_url'), self.config_get('auctions_db') ) self.db = Database(self.couch_url, session=Session(retry_delays=range(10))) sync_design(self.db) self.feeder = ResourceFeeder( host=self.config_get('resource_api_server'), resource=self.config_get('resource_name'), version=self.config_get('resource_api_version'), key='', extra_params=API_EXTRA, retrievers_params=DEFAULT_RETRIEVERS_PARAMS )
class EdgeDataBridge(object): """Edge Bridge""" def __init__(self, config): super(EdgeDataBridge, self).__init__() self.config = config self.workers_config = {} self.log_dict = {} self.bridge_id = uuid.uuid4().hex self.api_host = self.config_get('resources_api_server') self.api_version = self.config_get('resources_api_version') self.retrievers_params = self.config_get('retrievers_params') # Check up_wait_sleep up_wait_sleep = self.retrievers_params.get('up_wait_sleep') if up_wait_sleep is not None and up_wait_sleep < 30: raise DataBridgeConfigError('Invalid \'up_wait_sleep\' in ' '\'retrievers_params\'. Value must be ' 'grater than 30.') # Workers settings for key in WORKER_CONFIG: self.workers_config[key] = (self.config_get(key) or WORKER_CONFIG[key]) # Init config for key in DEFAULTS: setattr(self, key, self.config_get(key) or DEFAULTS[key]) # Pools self.workers_pool = gevent.pool.Pool(self.workers_max) self.retry_workers_pool = gevent.pool.Pool(self.retry_workers_max) self.filter_workers_pool = gevent.pool.Pool(self.filter_workers_count) # Queues if self.input_queue_size == -1: self.input_queue = Queue() else: self.input_queue = Queue(self.input_queue_size) if self.resource_items_queue_size == -1: self.resource_items_queue = Queue() else: self.resource_items_queue = Queue(self.resource_items_queue_size) self.api_clients_queue = Queue() # self.retry_api_clients_queue = Queue() if self.retry_resource_items_queue_size == -1: self.retry_resource_items_queue = Queue() else: self.retry_resource_items_queue = Queue( self.retry_resource_items_queue_size) self.process = psutil.Process(os.getpid()) # Default values for statistic variables for key in ('not_actual_docs_count', 'update_documents', 'droped', 'add_to_resource_items_queue', 'save_documents', 'skiped', 'add_to_retry', 'exceptions_count', 'not_found_count', 'timeshift', 'request_dev'): self.log_dict[key] = 0 if self.api_host != '' and self.api_host is not None: api_host = urlparse(self.api_host) if api_host.scheme == '' and api_host.netloc == '': raise DataBridgeConfigError( 'Invalid \'tenders_api_server\' url.') else: raise DataBridgeConfigError('In config dictionary empty or missing' ' \'tenders_api_server\'') #self.db = prepare_couchdb(self.couch_url, self.db_name, logger) #db_url = self.couch_url + '/' + self.db_name #prepare_couchdb_views(db_url, self.workers_config['resource'], logger) self.db = Elasticsearch() self.db.indices.create(index=self.db_name, ignore=400) settings = self.db.indices.get_settings(index=self.db_name, name='index.mapping.total_fields.limit') if settings.get(self.db_name, {}).get(u'settings', {}).get(u'index', {}).get(u'mapping', {}).get(u'total_fields', {}).get(u'limit', u'1000') != u'4000': self.db.indices.put_settings(body={'index.mapping.total_fields.limit': 4000}, index=self.db_name) self.db.index_get = partial(self.db.get, index=self.db_name) self.db.index_bulk = partial(self.db.bulk, index=self.db_name) collector_config = { 'main': { 'storage': 'couchdb', 'couch_url': self.couch_url, 'log_db': self.log_db_name } } #self.server = Server(self.couch_url, session=Session(retry_delays=range(10))) #self.logger = LogsCollector(collector_config) self.view_path = '_design/{}/_view/by_dateModified'.format( self.workers_config['resource']) extra_params = { 'mode': self.retrieve_mode, 'limit': self.resource_items_limit } self.feeder = ResourceFeeder(host=self.api_host, version=self.api_version, key='', resource=self.workers_config['resource'], extra_params=extra_params, retrievers_params=self.retrievers_params, adaptive=True) self.api_clients_info = {} # self.retry_api_clients_info = {} def config_get(self, name): try: return self.config.get('main').get(name) except AttributeError: raise DataBridgeConfigError('In config dictionary missed section' ' \'main\'') def create_api_client(self): client_user_agent = self.user_agent + '/' + self.bridge_id timeout = 0.1 while 1: try: api_client = APIClient(host_url=self.api_host, user_agent=client_user_agent, api_version=self.api_version, key='', resource=self.workers_config['resource']) client_id = uuid.uuid4().hex logger.info('Started api_client {}'.format( api_client.session.headers['User-Agent']), extra={'MESSAGE_ID': 'create_api_clients', 'type': 'counter'}) api_client_dict = { 'id': client_id, 'client': api_client, 'request_interval': 0, 'not_actual_count': 0 } self.api_clients_info[api_client_dict['id']] = { 'destroy': False, 'request_durations': {}, 'request_interval': 0, 'avg_duration': 0 } self.api_clients_queue.put(api_client_dict) break except RequestFailed as e: self.log_dict['exceptions_count'] += 1 logger.error( 'Failed start api_client with status code {}'.format( e.status_code), extra={'MESSAGE_ID': 'exceptions', 'type': 'counter'}) timeout = timeout * 2 logger.info('create_api_client will be sleep {} sec.'.format(timeout)) sleep(timeout) except Exception as e: self.log_dict['exceptions_count'] += 1 logger.error( 'Failed start api client with error: {}'.format(e.message), extra={'MESSAGE_ID': 'exceptions', 'type': 'counter'}) timeout = timeout * 2 logger.info('create_api_client will be sleep {} sec.'.format(timeout)) sleep(timeout) def fill_api_clients_queue(self): while self.api_clients_queue.qsize() < self.workers_min: self.create_api_client() def fill_input_queue(self): for resource_item in self.feeder.get_resource_items(): self.input_queue.put(resource_item) logger.debug('Add to temp queue from sync: {} {} {}'.format( self.workers_config['resource'][:-1], resource_item['id'], resource_item['dateModified']), extra={'MESSAGE_ID': 'received_from_sync', 'type': 'counter'}) def send_bulk(self, input_dict): sleep_before_retry = 2 for i in xrange(0, 3): try: #rows = self.db.view(self.view_path, keys=input_dict.values()) #resp_dict = {k.id: k.key for k in rows} rows = self.db.mget(index=self.db_name, doc_type=self.workers_config['resource'], body={"ids":input_dict.keys()}, _source_include="dateModified") resp_dict = {k['_id']: (k['_source']["dateModified"] if '_source' in k else k['found']) for k in rows['docs']} break except (IncompleteRead, Exception) as e: logger.error('Error while send bulk {}'.format(e.message)) if i == 2: raise e sleep(sleep_before_retry) sleep_before_retry *= 2 for item_id, date_modified in input_dict.items(): if item_id in resp_dict and date_modified == resp_dict[item_id]: self.log_dict['skiped'] += 1 logger.debug('Ignored {} {}: SYNC - {}, EDGE - {}'.format( self.workers_config['resource'][:-1], item_id, date_modified, resp_dict[item_id]), extra={'MESSAGE_ID': 'skiped', 'type': 'counter'}) else: self.resource_items_queue.put({ 'id': item_id, 'dateModified': date_modified }) logger.debug('Put to main queue {}: {} {}'.format( self.workers_config['resource'][:-1], item_id, date_modified), extra={'MESSAGE_ID': 'add_to_resource_items_queue', 'type': 'counter'}) self.log_dict['add_to_resource_items_queue'] += 1 def fill_resource_items_queue(self): start_time = datetime.now() input_dict = {} while True: # Get resource_item from temp queue if not self.input_queue.empty(): resource_item = self.input_queue.get() else: timeout = self.bulk_query_interval -\ (datetime.now() - start_time).total_seconds() if timeout > self.bulk_query_interval: timeout = self.bulk_query_interval try: resource_item = self.input_queue.get(timeout=timeout) except Empty: resource_item = None # Add resource_item to bulk if resource_item is not None: input_dict[resource_item['id']] = resource_item['dateModified'] if (len(input_dict) >= self.bulk_query_limit or (datetime.now() - start_time).total_seconds() >= self.bulk_query_interval): if len(input_dict) > 0: self.send_bulk(input_dict) input_dict = {} start_time = datetime.now() def resource_items_filter(self, r_id, r_date_modified): try: local_document = self.db.get(r_id) if local_document: if local_document['dateModified'] < r_date_modified: return True else: return False else: return True except Exception as e: logger.error( 'Filter error: Error while getting {} {} from couchdb: ' '{}'.format( self.workers_config['resource'][:-1], r_id, e.message), extra={'MESSAGE_ID': 'exceptions', 'type': 'counter'}) return True def reset_log_counters(self): st_dev = self.log_dict['request_dev'] for key in self.log_dict.keys(): self.log_dict[key] = 0 self.log_dict['request_dev'] = st_dev def _get_average_requests_duration(self): req_durations = [] delta = timedelta(seconds=self.perfomance_window) current_date = datetime.now() - delta for cid, info in self.api_clients_info.items(): if len(info['request_durations']) > 0: if min(info['request_durations'].keys()) <= current_date: info['grown'] = True avg = round( sum(info['request_durations'].values()) * 1.0 / len(info['request_durations']), 3) req_durations.append(avg) info['avg_duration'] = avg if len(req_durations) > 0: return round(sum(req_durations) / len(req_durations), 3), req_durations else: return 0, req_durations def bridge_stats(self): sync_forward_last_response =\ (datetime.now() - self.feeder.forward_info.get('last_response', datetime.now())).total_seconds() if self.feeder.backward_info.get('status') == FINISHED: sync_backward_last_response = 0 else: sync_backward_last_response =\ (datetime.now() - self.feeder.backward_info.get('last_response', datetime.now())).total_seconds() stats_dict = {k: v for k, v in self.log_dict.items()} stats_dict['avg_request_duration'], avg_list = self._get_average_requests_duration() stats_dict['avg_request_duration'] = stats_dict['avg_request_duration'] * 1000 if len(avg_list) > 0: stats_dict['min_avg_request_duration'] = round(min(avg_list), 3) * 1000 stats_dict['max_avg_request_duration'] = round(max(avg_list), 3) * 1000 else: stats_dict['min_avg_request_duration'] = 0 stats_dict['max_avg_request_duration'] = 0 stats_dict['_id'] = self.workers_config['resource'] stats_dict['resource'] = self.workers_config['resource'] stats_dict['time'] = datetime.now().isoformat() stats_dict['resource_items_queue_size'] = self.resource_items_queue.qsize() stats_dict['retry_resource_items_queue_size'] = self.retry_resource_items_queue.qsize() stats_dict['workers_count'] = self.workers_max - self.workers_pool.free_count() if self.filler.exception: stats_dict['filter_workers_count'] = 0 else: stats_dict['filter_workers_count'] = 1 stats_dict['retry_workers_count'] =\ self.retry_workers_max - self.retry_workers_pool.free_count() stats_dict['api_clients_count'] = len(self.api_clients_info) stats_dict['rss'] = self.process.memory_info().rss / 1024 / 1024 stats_dict['vms'] = self.process.memory_info().vms / 1024 / 1024 stats_dict['sync_queue'] = self.feeder.queue.qsize() stats_dict['sync_forward_response_len'] =\ self.feeder.forward_info.get('resource_item_count', 0) stats_dict['sync_backward_response_len'] =\ self.feeder.backward_info.get('resource_item_count', 0) stats_dict['sync_forward_last_response'] = sync_forward_last_response stats_dict['sync_backward_last_response'] = sync_backward_last_response return stats_dict def queues_controller(self): while True: if (self.workers_pool.free_count() > 0 and (self.resource_items_queue.qsize() > ((float(self.resource_items_queue_size) / 100) * self.workers_inc_threshold))): self.create_api_client() w = ResourceItemWorker.spawn(self.api_clients_queue, self.resource_items_queue, self.db, self.workers_config, self.retry_resource_items_queue, self.log_dict, self.api_clients_info) self.workers_pool.add(w) logger.info('Queue controller: Create main queue worker.') elif (self.resource_items_queue.qsize() < ((float(self.resource_items_queue_size) / 100) * self.workers_dec_threshold)): if len(self.workers_pool) > self.workers_min: wi = self.workers_pool.greenlets.pop() wi.shutdown() api_client_dict = self.api_clients_queue.get() del self.api_clients_info[api_client_dict['id']] logger.info('Queue controller: Kill main queue worker.') filled_resource_items_queue = round( self.resource_items_queue.qsize() / (float(self.resource_items_queue_size) / 100), 2) logger.info('Resource items queue filled on {} %'.format(filled_resource_items_queue)) filled_retry_resource_items_queue \ = round(self.retry_resource_items_queue.qsize() / float( self.retry_resource_items_queue_size) / 100, 2) logger.info('Retry resource items queue filled on {} %'.format( filled_retry_resource_items_queue)) sleep(self.queues_controller_timeout) def gevent_watcher(self): self.perfomance_watcher() #for t in self.server.tasks(): #if (t['type'] == 'indexer' and t['database'] == self.db_name and #t.get('design_document', None) == '_design/{}'.format( #self.workers_config['resource'])): #logger.info('Watcher: Waiting for end of view indexing. Current' #' progress: {} %'.format(t['progress'])) #spawn(self.logger.save, self.bridge_stats()) self.reset_log_counters() # for i in xrange(0, self.filter_workers_pool.free_count()): # self.filter_workers_pool.spawn(self.fill_resource_items_queue) # logger.info('Watcher: Create fill queue worker.') # Check fill threads if self.input_queue_filler.exception: logger.error('Temp queue filler error: {}'.format( self.input_queue_filler.exception.message), extra={'MESSAGE_ID': 'exception', 'type': 'counter'}) self.input_queue_filler = spawn(self.fill_input_queue) if self.filler.exception: logger.error('Fill thread error: {}'.format( self.filler.exception.message), extra={'MESSAGE_ID': 'exception', 'type': 'counter'}) self.filler = spawn(self.fill_resource_items_queue) if len(self.workers_pool) < self.workers_min: for i in xrange(0, (self.workers_min - len(self.workers_pool))): w = ResourceItemWorker.spawn(self.api_clients_queue, self.resource_items_queue, self.db, self.workers_config, self.retry_resource_items_queue, self.log_dict, self.api_clients_info) self.workers_pool.add(w) logger.info('Watcher: Create main queue worker.') self.create_api_client() if len(self.retry_workers_pool) < self.retry_workers_min: for i in xrange(0, self.retry_workers_min - len(self.retry_workers_pool)): self.create_api_client() w = ResourceItemWorker.spawn(self.api_clients_queue, self.retry_resource_items_queue, self.db, self.workers_config, self.retry_resource_items_queue, self.log_dict, self.api_clients_info) self.retry_workers_pool.add(w) logger.info('Watcher: Create retry queue worker.') def _calculate_st_dev(self, values): if len(values) > 0: avg = sum(values) * 1.0 / len(values) variance = map(lambda x: (x - avg) ** 2, values) avg_variance = sum(variance) * 1.0 / len(variance) st_dev = math.sqrt(avg_variance) return round(st_dev, 3) else: return 0 def _mark_bad_clients(self, dev): # Mark bad api clients for cid, info in self.api_clients_info.items(): if info.get('grown', False) and info['avg_duration'] > dev: info['destroy'] = True self.create_api_client() logger.debug('Perfomance watcher: Mark client {} as bad, avg.' ' request_duration is {} sec.'.format( cid, info['avg_duration'])) elif info['avg_duration'] < dev and info['request_interval'] > 0: self.create_api_client() info['destroy'] = True logger.debug('Perfomance watcher: Mark client {} as bad,' ' request_interval is {} sec.'.format( cid, info['request_interval'])) def perfomance_watcher(self): avg_duration, values = self._get_average_requests_duration() for _, info in self.api_clients_info.items(): delta = timedelta( seconds=self.perfomance_window + self.watch_interval) current_date = datetime.now() - delta delete_list = [] for key in info['request_durations']: if key < current_date: delete_list.append(key) for k in delete_list: del info['request_durations'][k] delete_list = [] st_dev = self._calculate_st_dev(values) dev = round(st_dev + avg_duration, 3) logger.info('Perfomance watcher: Standart deviation for ' 'request_duration is {} sec.'.format(round(st_dev, 3))) self.log_dict['request_dev'] = dev * 1000 self._mark_bad_clients(dev) clear_api_client_queue(self.api_clients_queue, self.api_clients_info) def run(self): logger.info('Start Edge Bridge', extra={'MESSAGE_ID': 'edge_bridge_start_bridge'}) logger.info('Start data sync...', extra={'MESSAGE_ID': 'edge_bridge__data_sync'}) self.input_queue_filler = spawn(self.fill_input_queue) self.filler = spawn(self.fill_resource_items_queue) spawn(self.queues_controller) while True: self.gevent_watcher() sleep(self.watch_interval)
class AuctionsDataBridge(object): """Auctions Data Bridge""" def __init__(self, config): super(AuctionsDataBridge, self).__init__() self.config = config self.tenders_ids_list = [] self.tz = tzlocal() DEFAULT_RETRIEVERS_PARAMS.update(self.config.get('main').get('retrievers_params', {})) self.couch_url = urljoin( self.config_get('couch_url'), self.config_get('auctions_db') ) self.db = Database(self.couch_url, session=Session(retry_delays=range(10))) sync_design(self.db) self.feeder = ResourceFeeder( host=self.config_get('tenders_api_server'), resource='tenders', version=self.config_get('tenders_api_version'), key='', extra_params={'opt_fields': 'status,auctionPeriod,lots', 'mode': '_all_'}, retrievers_params=DEFAULT_RETRIEVERS_PARAMS ) def config_get(self, name): return self.config.get('main').get(name) def get_teders_list(self, re_planning=False): for item in self.feeder.get_resource_items(): if item['status'] == "active.auction": if 'lots' not in item and 'auctionPeriod' in item and 'startDate' in item['auctionPeriod'] \ and 'endDate' not in item['auctionPeriod']: start_date = iso8601.parse_date(item['auctionPeriod']['startDate']) start_date = start_date.astimezone(self.tz) auctions_start_in_date = startDate_view( self.db, key=(mktime(start_date.timetuple()) + start_date.microsecond / 1E6) * 1000 ) if datetime.now(self.tz) > start_date: logger.info("Tender {} start date in past. Skip it for planning".format(item['id']), extra={'MESSAGE_ID': DATA_BRIDGE_PLANNING_TENDER_SKIP}) continue if re_planning and item['id'] in self.tenders_ids_list: logger.info("Tender {} already planned while replanning".format(item['id']), extra={'MESSAGE_ID': DATA_BRIDGE_RE_PLANNING_TENDER_ALREADY_PLANNED}) continue elif not re_planning and [row.id for row in auctions_start_in_date.rows if row.id == item['id']]: logger.info("Tender {} already planned on same date".format(item['id']), extra={'MESSAGE_ID': DATA_BRIDGE_PLANNING_TENDER_ALREADY_PLANNED}) continue yield (str(item['id']), ) elif 'lots' in item: for lot in item['lots']: if lot["status"] == "active" and 'auctionPeriod' in lot \ and 'startDate' in lot['auctionPeriod'] and 'endDate' not in lot['auctionPeriod']: start_date = iso8601.parse_date(lot['auctionPeriod']['startDate']) start_date = start_date.astimezone(self.tz) auctions_start_in_date = startDate_view( self.db, key=(mktime(start_date.timetuple()) + start_date.microsecond / 1E6) * 1000 ) if datetime.now(self.tz) > start_date: logger.info( "Start date for lot {} in tender {} is in past. Skip it for planning".format( lot['id'], item['id']), extra={'MESSAGE_ID': DATA_BRIDGE_PLANNING_LOT_SKIP} ) continue auction_id = MULTILOT_AUCTION_ID.format(item, lot) if re_planning and auction_id in self.tenders_ids_list: logger.info("Tender {} already planned while replanning".format(auction_id), extra={'MESSAGE_ID': DATA_BRIDGE_RE_PLANNING_LOT_ALREADY_PLANNED}) continue elif not re_planning and [row.id for row in auctions_start_in_date.rows if row.id == auction_id]: logger.info("Tender {} already planned on same date".format(auction_id), extra={'MESSAGE_ID': DATA_BRIDGE_PLANNING_LOT_ALREADY_PLANNED}) continue yield (str(item["id"]), str(lot["id"]), ) if item['status'] == "active.qualification" and 'lots' in item: for lot in item['lots']: if lot["status"] == "active": is_pre_announce = PreAnnounce_view(self.db) auction_id = MULTILOT_AUCTION_ID.format(item, lot) if [row.id for row in is_pre_announce.rows if row.id == auction_id]: self.start_auction_worker_cmd('announce', item['id'], lot_id=lot['id'],) if item['status'] == "cancelled": future_auctions = endDate_view( self.db, startkey=time() * 1000 ) if 'lots' in item: for lot in item['lots']: auction_id = MULTILOT_AUCTION_ID.format(item, lot) if auction_id in [i.id for i in future_auctions]: logger.info('Tender {0} selected for cancellation'.format(item['id'])) self.start_auction_worker_cmd('cancel', item['id'], lot_id=lot['id']) else: if item["id"] in [i.id for i in future_auctions]: logger.info('Tender {0} selected for cancellation'.format(item['id'])) self.start_auction_worker_cmd('cancel', item["id"]) def start_auction_worker_cmd(self, cmd, tender_id, with_api_version=None, lot_id=None): params = [self.config_get('auction_worker'), cmd, tender_id, self.config_get('auction_worker_config')] if lot_id: params += ['--lot', lot_id] logger.debug( 'Start auction worker with param \`--lot\`', extra={'MESSAGE_ID': 'BRIDGE_START_WORKER_WITH_LOT_PARAM'}) if with_api_version: params += ['--with_api_version', with_api_version] logger.debug( 'Start auction worker with param \`--with_api_version\`', extra={'MESSAGE_ID': 'BRIDGE_START_WORKER_WITH_API_V_PARAM'} ) message_id = 'BRIDGE_START_WORKER_WITH_{}_CMD'.format(cmd) logger.debug('Start auction worker with cmd: {}'.format(cmd), extra={'MESSAGE_ID': message_id}) result = do_until_success( check_call, args=(params,), ) logger.info("Auction command {} result: {}".format(params[1], result)) def run(self): logger.info('Start Auctions Bridge', extra={'MESSAGE_ID': DATA_BRIDGE_PLANNING_START_BRIDGE}) logger.info('Start data sync...', extra={'MESSAGE_ID': DATA_BRIDGE_PLANNING_DATA_SYNC}) for planning_data in self.get_teders_list(): if len(planning_data) == 1: logger.info('Tender {0} selected for planning'.format(*planning_data)) self.start_auction_worker_cmd('planning', planning_data[0]) elif len(planning_data) == 2: logger.info('Lot {1} of tender {0} selected for planning'.format(*planning_data)) self.start_auction_worker_cmd('planning', planning_data[0], lot_id=planning_data[1]) def run_re_planning(self): pass
def __init__(self, config): super(EdgeDataBridge, self).__init__() self.config = config self.workers_config = {} self.bridge_id = uuid.uuid4().hex self.api_host = self.config_get('resources_api_server') self.api_version = self.config_get('resources_api_version') self.retrievers_params = self.config_get('retrievers_params') # Check up_wait_sleep up_wait_sleep = self.retrievers_params.get('up_wait_sleep') if up_wait_sleep is not None and up_wait_sleep < 30: raise DataBridgeConfigError('Invalid \'up_wait_sleep\' in ' '\'retrievers_params\'. Value must be ' 'grater than 30.') # Workers settings for key in WORKER_CONFIG: self.workers_config[key] = (self.config_get(key) or WORKER_CONFIG[key]) # Init config for key in DEFAULTS: setattr(self, key, self.config_get(key) or DEFAULTS[key]) # Pools self.workers_pool = gevent.pool.Pool(self.workers_max) self.retry_workers_pool = gevent.pool.Pool(self.retry_workers_max) self.filter_workers_pool = gevent.pool.Pool(self.filter_workers_count) # Queues if self.input_queue_size == -1: self.input_queue = Queue() else: self.input_queue = Queue(self.input_queue_size) if self.resource_items_queue_size == -1: self.resource_items_queue = Queue() else: self.resource_items_queue = Queue(self.resource_items_queue_size) self.api_clients_queue = Queue() # self.retry_api_clients_queue = Queue() if self.retry_resource_items_queue_size == -1: self.retry_resource_items_queue = Queue() else: self.retry_resource_items_queue = Queue( self.retry_resource_items_queue_size) self.process = psutil.Process(os.getpid()) if self.api_host != '' and self.api_host is not None: api_host = urlparse(self.api_host) if api_host.scheme == '' and api_host.netloc == '': raise DataBridgeConfigError( 'Invalid \'tenders_api_server\' url.') else: raise DataBridgeConfigError('In config dictionary empty or missing' ' \'tenders_api_server\'') self.db = prepare_couchdb(self.couch_url, self.db_name, logger) db_url = self.couch_url + '/' + self.db_name prepare_couchdb_views(db_url, self.workers_config['resource'], logger) self.server = Server(self.couch_url, session=Session(retry_delays=range(10))) self.view_path = '_design/{}/_view/by_dateModified'.format( self.workers_config['resource']) extra_params = { 'mode': self.retrieve_mode, 'limit': self.resource_items_limit } self.feeder = ResourceFeeder(host=self.api_host, version=self.api_version, key='', resource=self.workers_config['resource'], extra_params=extra_params, retrievers_params=self.retrievers_params, adaptive=True) self.api_clients_info = {}
class EdgeDataBridge(object): """Edge Bridge""" def __init__(self, config): super(EdgeDataBridge, self).__init__() self.config = config self.workers_config = {} self.bridge_id = uuid.uuid4().hex self.api_host = self.config_get('resources_api_server') self.api_version = self.config_get('resources_api_version') self.retrievers_params = self.config_get('retrievers_params') # Check up_wait_sleep up_wait_sleep = self.retrievers_params.get('up_wait_sleep') if up_wait_sleep is not None and up_wait_sleep < 30: raise DataBridgeConfigError('Invalid \'up_wait_sleep\' in ' '\'retrievers_params\'. Value must be ' 'grater than 30.') # Workers settings for key in WORKER_CONFIG: self.workers_config[key] = (self.config_get(key) or WORKER_CONFIG[key]) # Init config for key in DEFAULTS: setattr(self, key, self.config_get(key) or DEFAULTS[key]) # Pools self.workers_pool = gevent.pool.Pool(self.workers_max) self.retry_workers_pool = gevent.pool.Pool(self.retry_workers_max) self.filter_workers_pool = gevent.pool.Pool(self.filter_workers_count) # Queues if self.input_queue_size == -1: self.input_queue = Queue() else: self.input_queue = Queue(self.input_queue_size) if self.resource_items_queue_size == -1: self.resource_items_queue = Queue() else: self.resource_items_queue = Queue(self.resource_items_queue_size) self.api_clients_queue = Queue() # self.retry_api_clients_queue = Queue() if self.retry_resource_items_queue_size == -1: self.retry_resource_items_queue = Queue() else: self.retry_resource_items_queue = Queue( self.retry_resource_items_queue_size) self.process = psutil.Process(os.getpid()) if self.api_host != '' and self.api_host is not None: api_host = urlparse(self.api_host) if api_host.scheme == '' and api_host.netloc == '': raise DataBridgeConfigError( 'Invalid \'tenders_api_server\' url.') else: raise DataBridgeConfigError('In config dictionary empty or missing' ' \'tenders_api_server\'') self.db = prepare_couchdb(self.couch_url, self.db_name, logger) db_url = self.couch_url + '/' + self.db_name prepare_couchdb_views(db_url, self.workers_config['resource'], logger) self.server = Server(self.couch_url, session=Session(retry_delays=range(10))) self.view_path = '_design/{}/_view/by_dateModified'.format( self.workers_config['resource']) extra_params = { 'mode': self.retrieve_mode, 'limit': self.resource_items_limit } self.feeder = ResourceFeeder(host=self.api_host, version=self.api_version, key='', resource=self.workers_config['resource'], extra_params=extra_params, retrievers_params=self.retrievers_params, adaptive=True) self.api_clients_info = {} def config_get(self, name): try: return self.config.get('main').get(name) except AttributeError: raise DataBridgeConfigError('In config dictionary missed section' ' \'main\'') def create_api_client(self): client_user_agent = self.user_agent + '/' + self.bridge_id timeout = 0.1 while 1: try: api_client = APIClient( host_url=self.api_host, user_agent=client_user_agent, api_version=self.api_version, key='', resource=self.workers_config['resource']) client_id = uuid.uuid4().hex logger.info('Started api_client {}'.format( api_client.session.headers['User-Agent']), extra={'MESSAGE_ID': 'create_api_clients'}) api_client_dict = { 'id': client_id, 'client': api_client, 'request_interval': 0, 'not_actual_count': 0 } self.api_clients_info[api_client_dict['id']] = { 'drop_cookies': False, 'request_durations': {}, 'request_interval': 0, 'avg_duration': 0 } self.api_clients_queue.put(api_client_dict) break except RequestFailed as e: logger.error( 'Failed start api_client with status code {}'.format( e.status_code), extra={'MESSAGE_ID': 'exceptions'}) timeout = timeout * 2 logger.info( 'create_api_client will be sleep {} sec.'.format(timeout)) sleep(timeout) except Exception as e: logger.error('Failed start api client with error: {}'.format( e.message), extra={'MESSAGE_ID': 'exceptions'}) timeout = timeout * 2 logger.info( 'create_api_client will be sleep {} sec.'.format(timeout)) sleep(timeout) def fill_api_clients_queue(self): while self.api_clients_queue.qsize() < self.workers_min: self.create_api_client() def fill_input_queue(self): for resource_item in self.feeder.get_resource_items(): self.input_queue.put(resource_item) logger.debug('Add to temp queue from sync: {} {} {}'.format( self.workers_config['resource'][:-1], resource_item['id'], resource_item['dateModified']), extra={'MESSAGE_ID': 'received_from_sync'}) def send_bulk(self, input_dict): sleep_before_retry = 2 for i in xrange(0, 3): try: rows = self.db.view(self.view_path, keys=input_dict.values()) resp_dict = {k.id: k.key for k in rows} break except (IncompleteRead, Exception) as e: logger.error('Error while send bulk {}'.format(e.message), extra={'MESSAGE_ID': 'exceptions'}) if i == 2: raise e sleep(sleep_before_retry) sleep_before_retry *= 2 for item_id, date_modified in input_dict.items(): if item_id in resp_dict and date_modified == resp_dict[item_id]: logger.debug('Ignored {} {}: SYNC - {}, EDGE - {}'.format( self.workers_config['resource'][:-1], item_id, date_modified, resp_dict[item_id]), extra={'MESSAGE_ID': 'skiped'}) else: self.resource_items_queue.put({ 'id': item_id, 'dateModified': date_modified }) logger.debug( 'Put to main queue {}: {} {}'.format( self.workers_config['resource'][:-1], item_id, date_modified), extra={'MESSAGE_ID': 'add_to_resource_items_queue'}) def fill_resource_items_queue(self): start_time = datetime.now() input_dict = {} while True: # Get resource_item from temp queue if not self.input_queue.empty(): resource_item = self.input_queue.get() else: timeout = self.bulk_query_interval -\ (datetime.now() - start_time).total_seconds() if timeout > self.bulk_query_interval: timeout = self.bulk_query_interval try: resource_item = self.input_queue.get(timeout=timeout) except Empty: resource_item = None # Add resource_item to bulk if resource_item is not None: input_dict[resource_item['id']] = resource_item['dateModified'] if (len(input_dict) >= self.bulk_query_limit or (datetime.now() - start_time).total_seconds() >= self.bulk_query_interval): if len(input_dict) > 0: self.send_bulk(input_dict) input_dict = {} start_time = datetime.now() def resource_items_filter(self, r_id, r_date_modified): try: local_document = self.db.get(r_id) if local_document: if local_document['dateModified'] < r_date_modified: return True else: return False else: return True except Exception as e: logger.error( 'Filter error: Error while getting {} {} from couchdb: ' '{}'.format(self.workers_config['resource'][:-1], r_id, e.message), extra={'MESSAGE_ID': 'exceptions'}) return True def _get_average_requests_duration(self): req_durations = [] delta = timedelta(seconds=self.perfomance_window) current_date = datetime.now() - delta for cid, info in self.api_clients_info.items(): if len(info['request_durations']) > 0: if min(info['request_durations'].keys()) <= current_date: info['grown'] = True avg = round( sum(info['request_durations'].values()) * 1.0 / len(info['request_durations']), 3) req_durations.append(avg) info['avg_duration'] = avg if len(req_durations) > 0: return round(sum(req_durations) / len(req_durations), 3), req_durations else: return 0, req_durations # TODO: Add logic for restart sync if last response grater than some values # and no active tasks specific for resource def queues_controller(self): while True: if (self.workers_pool.free_count() > 0 and (self.resource_items_queue.qsize() > ((float(self.resource_items_queue_size) / 100) * self.workers_inc_threshold))): self.create_api_client() w = ResourceItemWorker.spawn(self.api_clients_queue, self.resource_items_queue, self.db, self.workers_config, self.retry_resource_items_queue, self.api_clients_info) self.workers_pool.add(w) logger.info('Queue controller: Create main queue worker.') elif (self.resource_items_queue.qsize() < ((float(self.resource_items_queue_size) / 100) * self.workers_dec_threshold)): if len(self.workers_pool) > self.workers_min: wi = self.workers_pool.greenlets.pop() wi.shutdown() api_client_dict = self.api_clients_queue.get() del self.api_clients_info[api_client_dict['id']] logger.info('Queue controller: Kill main queue worker.') filled_resource_items_queue = round( self.resource_items_queue.qsize() / (float(self.resource_items_queue_size) / 100), 2) logger.info('Resource items queue filled on {} %'.format( filled_resource_items_queue)) filled_retry_resource_items_queue \ = round(self.retry_resource_items_queue.qsize() / float( self.retry_resource_items_queue_size) / 100, 2) logger.info('Retry resource items queue filled on {} %'.format( filled_retry_resource_items_queue)) sleep(self.queues_controller_timeout) def gevent_watcher(self): self.perfomance_watcher() for t in self.server.tasks(): if (t['type'] == 'indexer' and t['database'] == self.db_name and t.get('design_document', None) == '_design/{}'.format( self.workers_config['resource'])): logger.info( 'Watcher: Waiting for end of view indexing. Current' ' progress: {} %'.format(t['progress'])) # Check fill threads input_threads = 1 if self.input_queue_filler.exception: input_threads = 0 logger.error('Temp queue filler error: {}'.format( self.input_queue_filler.exception.message), extra={'MESSAGE_ID': 'exception'}) self.input_queue_filler = spawn(self.fill_input_queue) logger.info('Input threads {}'.format(input_threads), extra={'INPUT_THREADS': input_threads}) fill_threads = 1 if self.filler.exception: fill_threads = 0 logger.error('Fill thread error: {}'.format( self.filler.exception.message), extra={'MESSAGE_ID': 'exception'}) self.filler = spawn(self.fill_resource_items_queue) logger.info('Filter threads {}'.format(fill_threads), extra={'FILTER_THREADS': fill_threads}) main_threads = self.workers_max - self.workers_pool.free_count() logger.info('Main threads {}'.format(main_threads), extra={'MAIN_THREADS': main_threads}) if len(self.workers_pool) < self.workers_min: for i in xrange(0, (self.workers_min - len(self.workers_pool))): w = ResourceItemWorker.spawn(self.api_clients_queue, self.resource_items_queue, self.db, self.workers_config, self.retry_resource_items_queue, self.api_clients_info) self.workers_pool.add(w) logger.info('Watcher: Create main queue worker.') self.create_api_client() retry_threads = self.retry_workers_max -\ self.retry_workers_pool.free_count() logger.info('Retry threads {}'.format(retry_threads), extra={'RETRY_THREADS': retry_threads}) if len(self.retry_workers_pool) < self.retry_workers_min: for i in xrange( 0, self.retry_workers_min - len(self.retry_workers_pool)): self.create_api_client() w = ResourceItemWorker.spawn(self.api_clients_queue, self.retry_resource_items_queue, self.db, self.workers_config, self.retry_resource_items_queue, self.api_clients_info) self.retry_workers_pool.add(w) logger.info('Watcher: Create retry queue worker.') # Log queues size and API clients count main_queue_size = self.resource_items_queue.qsize() logger.info('Resource items queue size {}'.format(main_queue_size), extra={'MAIN_QUEUE_SIZE': main_queue_size}) retry_queue_size = self.retry_resource_items_queue.qsize() logger.info( 'Resource items retry queue size {}'.format(retry_queue_size), extra={'RETRY_QUEUE_SIZE': retry_queue_size}) api_clients_count = len(self.api_clients_info) logger.info('API Clients count: {}'.format(api_clients_count), extra={'API_CLIENTS': api_clients_count}) def _calculate_st_dev(self, values): if len(values) > 0: avg = sum(values) * 1.0 / len(values) variance = map(lambda x: (x - avg)**2, values) avg_variance = sum(variance) * 1.0 / len(variance) st_dev = math.sqrt(avg_variance) return round(st_dev, 3) else: return 0 def _mark_bad_clients(self, dev): # Mark bad api clients for cid, info in self.api_clients_info.items(): if info.get('grown', False) and info['avg_duration'] > dev: info['drop_cookies'] = True self.create_api_client() logger.debug('Perfomance watcher: Mark client {} as bad, avg.' ' request_duration is {} sec.'.format( cid, info['avg_duration']), extra={'MESSAGE_ID': 'marked_as_bad'}) elif info['avg_duration'] < dev and info['request_interval'] > 0: self.create_api_client() info['drop_cookies'] = True logger.debug('Perfomance watcher: Mark client {} as bad,' ' request_interval is {} sec.'.format( cid, info['request_interval']), extra={'MESSAGE_ID': 'marked_as_bad'}) def perfomance_watcher(self): avg_duration, values = self._get_average_requests_duration() for _, info in self.api_clients_info.items(): delta = timedelta(seconds=self.perfomance_window + self.watch_interval) current_date = datetime.now() - delta delete_list = [] for key in info['request_durations']: if key < current_date: delete_list.append(key) for k in delete_list: del info['request_durations'][k] delete_list = [] st_dev = self._calculate_st_dev(values) if len(values) > 0: min_avg = min(values) * 1000 max_avg = max(values) * 1000 else: max_avg = 0 min_avg = 0 dev = round(st_dev + avg_duration, 3) logger.info('Perfomance watcher:\nREQUESTS_STDEV - {} sec.\n' 'REQUESTS_DEV - {} ms.\nREQUESTS_MIN_AVG - {} ms.\n' 'REQUESTS_MAX_AVG - {} ms.\nREQUESTS_AVG - {} sec.'.format( round(st_dev, 3), dev, min_avg, max_avg, avg_duration), extra={ 'REQUESTS_DEV': dev * 1000, 'REQUESTS_MIN_AVG': min_avg, 'REQUESTS_MAX_AVG': max_avg, 'REQUESTS_AVG': avg_duration * 1000 }) self._mark_bad_clients(dev) def run(self): logger.info('Start Edge Bridge', extra={'MESSAGE_ID': 'edge_bridge_start_bridge'}) logger.info('Start data sync...', extra={'MESSAGE_ID': 'edge_bridge__data_sync'}) self.input_queue_filler = spawn(self.fill_input_queue) self.filler = spawn(self.fill_resource_items_queue) spawn(self.queues_controller) while True: self.gevent_watcher() sleep(self.watch_interval)
class ResourceFeederTestCase(unittest.TestCase): def setUp(self): self.response = munchify( json.loads(""" { "next_page":{ "path":"/api/0.10/tenders?offset=2015-12-25T18%3A04%3A36.264176%2B02%3A00", "uri":"https://lb.api-sandbox.openprocurement.org/api/0.10/tenders?offset=2015-12-25T18%3A04%3A36.264176%2B02%3A00", "offset":"2015-12-25T18:04:36.264176+02:00" }, "prev_page":{ "path":"/api/0.10/tenders?offset=2015-12-25T18%3A04%3A36.264176%2B02%3A00", "uri":"https://lb.api-sandbox.openprocurement.org/api/0.10/tenders?offset=2015-12-25T18%3A04%3A36.264176%2B02%3A00", "offset":"2015-12-25T18:04:36.264176+02:00" }, "data":[ { "id":"823d50b3236247adad28a5a66f74db42", "dateModified":"2015-11-13T18:50:00.753811+02:00" }, { "id":"f3849ade33534174b8402579152a5f41", "dateModified":"2015-11-16T01:15:00.469896+02:00" }, { "id":"f3849ade33534174b8402579152a5f41", "dateModified":"2015-11-16T12:00:00.960077+02:00" } ] }""")) def test_instance_initialization(self): self.resource_feeder = ResourceFeeder() self.assertEqual(self.resource_feeder.key, '') self.assertEqual(self.resource_feeder.host, 'https://lb.api-sandbox.openprocurement.org/') self.assertEqual(self.resource_feeder.version, '2.3') self.assertEqual(self.resource_feeder.resource, 'tenders') self.assertEqual(self.resource_feeder.adaptive, False) self.assertEqual(self.resource_feeder.extra_params, { 'opt_fields': 'status', 'mode': '_all_' }) self.assertEqual( self.resource_feeder.retrievers_params, { 'down_requests_sleep': 5, 'up_requests_sleep': 1, 'up_wait_sleep': 30, 'up_wait_sleep_min': 5, 'queue_size': 101 }) self.assertIsInstance(self.resource_feeder.queue, Queue) self.assertEqual(self.resource_feeder.forward_info, {}) self.assertEqual(self.resource_feeder.backward_info, {}) def test_init_api_clients(self): self.resource_feeder = ResourceFeeder() self.resource_feeder.init_api_clients() self.assertEqual( self.resource_feeder.backward_params, { 'descending': True, 'feed': 'changes', 'opt_fields': 'status', 'mode': '_all_' }) self.assertEqual(self.resource_feeder.forward_params, { 'feed': 'changes', 'opt_fields': 'status', 'mode': '_all_' }) self.assertEqual(self.resource_feeder.forward_info, {'status': 1}) self.assertEqual(self.resource_feeder.backward_info, {'status': 1}) self.assertEqual(self.resource_feeder.forward_client.session.cookies, self.resource_feeder.backward_client.session.cookies) def test_handle_response_data(self): self.resource_feeder = ResourceFeeder() self.resource_feeder.handle_response_data(['tender1', 'tender2']) self.assertIn('tender1', list(self.resource_feeder.queue.queue)) self.assertIn('tender2', list(self.resource_feeder.queue.queue)) self.assertNotIn('tender3', list(self.resource_feeder.queue.queue)) @mock.patch('openprocurement_client.client.TendersClientSync.sync_tenders') @mock.patch('openprocurement_client.sync.spawn') def test_start_sync(self, mock_spawn, mock_sync_tenders): mock_spawn.return_value = 'spawn result' mock_sync_tenders.return_value = self.response self.resource_feeder = ResourceFeeder() self.resource_feeder.init_api_clients() self.resource_feeder.start_sync() self.assertEqual(self.resource_feeder.backward_params['offset'], self.response.next_page.offset) self.assertEqual(self.resource_feeder.forward_params['offset'], self.response.prev_page.offset) self.assertEqual(self.resource_feeder.forward_params['offset'], self.response.prev_page.offset) self.assertEqual(mock_spawn.call_count, 2) mock_spawn.assert_called_with(self.resource_feeder.retriever_forward) self.assertEqual(self.resource_feeder.backward_worker, 'spawn result') self.assertEqual(self.resource_feeder.forward_worker, 'spawn result') @mock.patch('openprocurement_client.client.TendersClientSync.sync_tenders') @mock.patch('openprocurement_client.sync.spawn') def test_restart_sync(self, mock_spawn, mock_sync_tenders): mock_spawn.return_value = mock.MagicMock() mock_spawn.return_value.kill = mock.MagicMock('kill result') mock_sync_tenders.return_value = self.response self.resource_feeder = ResourceFeeder() self.resource_feeder.init_api_clients() self.resource_feeder.start_sync() self.resource_feeder.restart_sync() self.assertEqual(mock_spawn.return_value.kill.call_count, 2) @mock.patch('openprocurement_client.client.TendersClientSync.sync_tenders') @mock.patch('openprocurement_client.sync.spawn') def test_get_resource_items_zero_value(self, mock_spawn, mock_sync_tenders): mock_sync_tenders.side_effect = [ self.response, munchify({ 'data': {}, 'next_page': { 'offset': 'next_page' }, 'prev_page': { 'offset': 'next_page' } }) ] mock_spawn.return_value = mock.MagicMock() mock_spawn.return_value.value = 0 self.resource_feeder = ResourceFeeder() mock_spawn.return_value.ready.return_value = True with mock.patch('__builtin__.True', AlmostAlwaysTrue(4)): result = self.resource_feeder.get_resource_items() self.assertEqual(tuple(result), tuple(self.response.data)) @mock.patch('openprocurement_client.client.TendersClientSync.sync_tenders') @mock.patch('openprocurement_client.sync.spawn') def test_get_resource_items_non_zero_value(self, mock_spawn, mock_sync_tenders): mock_sync_tenders.side_effect = [ self.response, munchify({ 'data': {}, 'next_page': { 'offset': 'next_page' }, 'prev_page': { 'offset': 'next_page' } }) ] mock_spawn.return_value = mock.MagicMock() mock_spawn.return_value.value = 1 self.resource_feeder = ResourceFeeder() mock_spawn.return_value.ready.side_effect = [True, False] with mock.patch('__builtin__.True', AlmostAlwaysTrue(4)): result = self.resource_feeder.get_resource_items() self.assertEqual(tuple(result), tuple(self.response.data)) @mock.patch('openprocurement_client.client.TendersClientSync.sync_tenders') @mock.patch('openprocurement_client.sync.spawn') @mock.patch('openprocurement_client.sync.sleep') def test_feeder_zero_value(self, mock_sleep, mock_spawn, mock_sync_tenders): mock_sleep.return_value = 'sleeping' mock_sync_tenders.side_effect = [ self.response, self.response, ConnectionError('conn error') ] self.resource_feeder = ResourceFeeder() mock_spawn.return_value = mock.MagicMock() mock_spawn.return_value.value = 0 mock_spawn.return_value.ready.return_value = True with self.assertRaises(ConnectionError) as e: self.resource_feeder.feeder() self.assertEqual(e.exception.message, 'conn error') self.assertEqual(mock_sleep.call_count, 1) @mock.patch('openprocurement_client.client.TendersClientSync.sync_tenders') @mock.patch('openprocurement_client.sync.spawn') @mock.patch('openprocurement_client.sync.sleep') def test_feeder(self, mock_sleep, mock_spawn, mock_sync_tenders): mock_sleep.return_value = 'sleeping' mock_sync_tenders.side_effect = [ self.response, self.response, ConnectionError('conn error') ] self.resource_feeder = ResourceFeeder() mock_spawn.return_value = mock.MagicMock() mock_spawn.return_value.value = 1 mock_spawn.return_value.ready.side_effect = [True, False, False, True] with self.assertRaises(ConnectionError) as e: self.resource_feeder.feeder() self.assertEqual(e.exception.message, 'conn error') self.assertEqual(mock_sleep.call_count, 1) @mock.patch('openprocurement_client.sync.spawn') def test_run_feeder(self, mock_spawn): mock_spawn.return_value = mock.MagicMock() self.resource_feeder = ResourceFeeder() result = self.resource_feeder.run_feeder() mock_spawn.assert_called_with(self.resource_feeder.feeder) self.assertEqual(result, self.resource_feeder.queue) @mock.patch('openprocurement_client.sync.get_response') def test_retriever_backward(self, mock_get_response): mock_get_response.side_effect = [self.response, munchify({'data': {}})] self.resource_feeder = ResourceFeeder() self.resource_feeder.init_api_clients() self.resource_feeder.backward_params = {"limit": 0} self.resource_feeder.backward_client = mock.MagicMock() self.resource_feeder.cookies = self.resource_feeder.backward_client.session.cookies self.resource_feeder.retriever_backward() self.assertEqual(self.resource_feeder.backward_params['offset'], self.response.next_page.offset) self.assertEqual( self.resource_feeder.backward_info['resource_item_count'], 0) self.assertEqual(self.resource_feeder.backward_info['status'], 0) @mock.patch('openprocurement_client.sync.get_response') def test_retriever_backward_wrong_cookies(self, mock_get_response): mock_get_response.return_value = self.response self.resource_feeder = ResourceFeeder() self.resource_feeder.init_api_clients() self.resource_feeder.backward_params = {"limit": 0} self.resource_feeder.backward_client = mock.MagicMock() self.resource_feeder.cookies = mock.MagicMock() self.resource_feeder.backward_client.session.cookies = mock.MagicMock() with self.assertRaises(Exception) as e: self.resource_feeder.retriever_backward() self.assertEqual(e.exception.message, 'LB Server mismatch') @mock.patch('openprocurement_client.sync.get_response') def test_retriever_forward_wrong_cookies(self, mock_get_response): mock_get_response.return_value = self.response self.resource_feeder = ResourceFeeder() self.resource_feeder.init_api_clients() self.resource_feeder.backward_params = {"limit": 0} self.resource_feeder.backward_client = mock.MagicMock() self.resource_feeder.cookies = mock.MagicMock() self.resource_feeder.backward_client.session.cookies = mock.MagicMock() with self.assertRaises(Exception) as e: self.resource_feeder.retriever_forward() self.assertEqual(e.exception.message, 'LB Server mismatch') @mock.patch('openprocurement_client.sync.get_response') def test_retriever_forward(self, mock_get_response): mock_get_response.side_effect = [ self.response, self.response, ConnectionError('connection error') ] self.resource_feeder = ResourceFeeder() self.resource_feeder.init_api_clients() self.resource_feeder.forward_params = {"limit": 0} self.resource_feeder.forward_client = mock.MagicMock() self.resource_feeder.forward_client.session.cookies = self.resource_feeder.cookies with self.assertRaises(ConnectionError) as e: self.resource_feeder.retriever_forward() self.assertEqual(e.exception.message, 'connection error') self.assertEqual(self.resource_feeder.forward_params['offset'], self.response.next_page.offset) self.assertEqual( self.resource_feeder.forward_info['resource_item_count'], 3) self.assertEqual(self.resource_feeder.forward_info['status'], 3) @mock.patch('openprocurement_client.sync.get_response') def test_retriever_forward_no_data(self, mock_get_response): mock_get_response.side_effect = [ munchify({ 'data': {}, 'next_page': { 'offset': 'next_page' } }), ConnectionError('connection error') ] self.resource_feeder = ResourceFeeder() self.resource_feeder.init_api_clients() self.resource_feeder.forward_params = {"limit": 0} self.resource_feeder.forward_client = mock.MagicMock() self.resource_feeder.forward_client.session.cookies = self.resource_feeder.cookies with self.assertRaises(ConnectionError) as e: self.resource_feeder.retriever_forward() self.assertEqual(e.exception.message, 'connection error') self.assertEqual(self.resource_feeder.forward_params['offset'], 'next_page') self.assertEqual( self.resource_feeder.forward_info['resource_item_count'], 0) self.assertEqual(self.resource_feeder.forward_info['status'], 3) @mock.patch('openprocurement_client.sync.get_response') def test_retriever_forward_adaptive(self, mock_get_response): mock_get_response.side_effect = [ self.response, munchify({ 'data': {}, 'next_page': { 'offset': 'next_page' } }), self.response, ConnectionError('connection error') ] self.resource_feeder = ResourceFeeder(adaptive=True) self.resource_feeder.init_api_clients() self.resource_feeder.forward_params = {"limit": 0} self.resource_feeder.forward_client = mock.MagicMock() self.resource_feeder.forward_client.session.cookies = self.resource_feeder.cookies with self.assertRaises(ConnectionError) as e: self.resource_feeder.retriever_forward() self.assertEqual(e.exception.message, 'connection error') self.assertEqual(self.resource_feeder.forward_params['offset'], self.response.next_page.offset) self.assertEqual( self.resource_feeder.forward_info['resource_item_count'], 3) self.assertEqual(self.resource_feeder.forward_info['status'], 3) @mock.patch('openprocurement_client.sync.get_response') def test_retriever_forward_no_data_adaptive(self, mock_get_response): mock_get_response.side_effect = [ munchify({ 'data': {}, 'next_page': { 'offset': 'next_page' } }), munchify({ 'data': {}, 'next_page': { 'offset': 'next_page' } }), ConnectionError('connection error') ] self.resource_feeder = ResourceFeeder(adaptive=True, retrievers_params={ 'down_requests_sleep': 5, 'up_requests_sleep': 1, 'up_wait_sleep': 15, 'up_wait_sleep_min': 5, 'queue_size': 101 }) self.resource_feeder.init_api_clients() self.resource_feeder.forward_params = {"limit": 0} self.resource_feeder.forward_client = mock.MagicMock() self.resource_feeder.forward_client.session.cookies = self.resource_feeder.cookies with self.assertRaises(ConnectionError) as e: self.resource_feeder.retriever_forward() self.assertEqual(e.exception.message, 'connection error') self.assertEqual(self.resource_feeder.forward_params['offset'], 'next_page') self.assertEqual( self.resource_feeder.forward_info['resource_item_count'], 0) self.assertEqual(self.resource_feeder.forward_info['status'], 3) @mock.patch('openprocurement_client.sync.ResourceFeeder.get_resource_items' ) def test_get_resource_items(self, mock_get_resource_items): mock_get_resource_items.return_value = 'feeder_instance' result = get_resource_items(resource='tenders') self.assertEqual(result, 'feeder_instance') self.assertEqual(mock_get_resource_items.call_count, 1) @mock.patch('openprocurement_client.sync.get_resource_items') def test_get_tenders(self, mock_get_resource_items): mock_get_resource_items.return_value = 'get_resource_items_call' result = get_tenders() self.assertEqual(result, 'get_resource_items_call') self.assertEqual(mock_get_resource_items.call_count, 1)
def test_run_feeder(self, mock_spawn): mock_spawn.return_value = mock.MagicMock() self.resource_feeder = ResourceFeeder() result = self.resource_feeder.run_feeder() mock_spawn.assert_called_with(self.resource_feeder.feeder) self.assertEqual(result, self.resource_feeder.queue)
def test_handle_response_data(self): self.resource_feeder = ResourceFeeder() self.resource_feeder.handle_response_data(['tender1', 'tender2']) self.assertIn('tender1', list(self.resource_feeder.queue.queue)) self.assertIn('tender2', list(self.resource_feeder.queue.queue)) self.assertNotIn('tender3', list(self.resource_feeder.queue.queue))
def __init__(self, config): super(EdgeDataBridge, self).__init__() self.config = config self.workers_config = {} self.log_dict = {} self.bridge_id = uuid.uuid4().hex self.api_host = self.config_get('resources_api_server') self.api_version = self.config_get('resources_api_version') self.retrievers_params = self.config_get('retrievers_params') # Check up_wait_sleep up_wait_sleep = self.retrievers_params.get('up_wait_sleep') if up_wait_sleep is not None and up_wait_sleep < 30: raise DataBridgeConfigError('Invalid \'up_wait_sleep\' in ' '\'retrievers_params\'. Value must be ' 'grater than 30.') # Workers settings for key in WORKER_CONFIG: self.workers_config[key] = (self.config_get(key) or WORKER_CONFIG[key]) # Init config for key in DEFAULTS: setattr(self, key, self.config_get(key) or DEFAULTS[key]) # Pools self.workers_pool = gevent.pool.Pool(self.workers_max) self.retry_workers_pool = gevent.pool.Pool(self.retry_workers_max) self.filter_workers_pool = gevent.pool.Pool(self.filter_workers_count) # Queues if self.input_queue_size == -1: self.input_queue = Queue() else: self.input_queue = Queue(self.input_queue_size) if self.resource_items_queue_size == -1: self.resource_items_queue = Queue() else: self.resource_items_queue = Queue(self.resource_items_queue_size) self.api_clients_queue = Queue() # self.retry_api_clients_queue = Queue() if self.retry_resource_items_queue_size == -1: self.retry_resource_items_queue = Queue() else: self.retry_resource_items_queue = Queue( self.retry_resource_items_queue_size) self.process = psutil.Process(os.getpid()) # Default values for statistic variables for key in ('not_actual_docs_count', 'update_documents', 'droped', 'add_to_resource_items_queue', 'save_documents', 'skiped', 'add_to_retry', 'exceptions_count', 'not_found_count', 'timeshift', 'request_dev'): self.log_dict[key] = 0 if self.api_host != '' and self.api_host is not None: api_host = urlparse(self.api_host) if api_host.scheme == '' and api_host.netloc == '': raise DataBridgeConfigError( 'Invalid \'tenders_api_server\' url.') else: raise DataBridgeConfigError('In config dictionary empty or missing' ' \'tenders_api_server\'') #self.db = prepare_couchdb(self.couch_url, self.db_name, logger) #db_url = self.couch_url + '/' + self.db_name #prepare_couchdb_views(db_url, self.workers_config['resource'], logger) self.db = Elasticsearch() self.db.indices.create(index=self.db_name, ignore=400) settings = self.db.indices.get_settings(index=self.db_name, name='index.mapping.total_fields.limit') if settings.get(self.db_name, {}).get(u'settings', {}).get(u'index', {}).get(u'mapping', {}).get(u'total_fields', {}).get(u'limit', u'1000') != u'4000': self.db.indices.put_settings(body={'index.mapping.total_fields.limit': 4000}, index=self.db_name) self.db.index_get = partial(self.db.get, index=self.db_name) self.db.index_bulk = partial(self.db.bulk, index=self.db_name) collector_config = { 'main': { 'storage': 'couchdb', 'couch_url': self.couch_url, 'log_db': self.log_db_name } } #self.server = Server(self.couch_url, session=Session(retry_delays=range(10))) #self.logger = LogsCollector(collector_config) self.view_path = '_design/{}/_view/by_dateModified'.format( self.workers_config['resource']) extra_params = { 'mode': self.retrieve_mode, 'limit': self.resource_items_limit } self.feeder = ResourceFeeder(host=self.api_host, version=self.api_version, key='', resource=self.workers_config['resource'], extra_params=extra_params, retrievers_params=self.retrievers_params, adaptive=True) self.api_clients_info = {}