def setUp(self): self.original_source_host_list = settings.settings['remote_source_host_list'] self.original_source_folder = settings.settings['remote_source_folder'] self.logger = get_logger(PROCESS_UNIT_TEST) self.worker = self.create_file_collector() self.actual_timeperiod = self.ACTUAL_TIMEPERIOD # switch off auxiliary threads self.worker.performance_tracker.cancel() self.worker._create_directories() self.tempdir_copying = tempfile.mkdtemp() fqsf = os.path.join(self.tempdir_copying, self.actual_timeperiod[:-2]) if not os.path.exists(fqsf): os.makedirs(fqsf) fqhf = os.path.join(self.tempdir_copying, AbstractFileCollectorWorker.HEADER_FOLDER) if not os.path.exists(fqhf): os.makedirs(fqhf) for file_name in self.TEST_FILE_LIST: output = open(os.path.join(fqsf, file_name), 'w') output.write(string_generator(self.TEST_FILE_SIZE)) output.close() for file_name in self.TEST_HEADER_LIST: output = open(os.path.join(fqhf, file_name), 'w') output.write(','.join(['column_{0}'.format(x) for x in range(5)])) output.close() settings.settings['remote_source_host_list'] = {socket.getfqdn(): ''} settings.settings['remote_source_folder'] = self.tempdir_copying
def setUp(self): self.process_name = PROCESS_ALERT_DAILY self.logger = get_logger(self.process_name) self.uow_id = create_and_insert_unit_of_work(self.process_name, 'range_start', 'range_end') self.uow_id = str(self.uow_id) self.uow_dao = UnitOfWorkDao(self.logger) self.uow_log_dao = UowLogDao(self.logger)
def reset_db(): """ drops the *scheduler* database, resets schema """ logger = get_logger(PROCESS_SCHEDULER) logger.info('Starting *scheduler* DB reset') ds = ds_manager.ds_factory(logger) ds._db_client.drop_database(settings.settings['mongo_db_name']) logger.info('*scheduler* db has been dropped') connection = ds.connection(COLLECTION_MANAGED_PROCESS) connection.create_index([(PROCESS_NAME, pymongo.ASCENDING)], unique=True) connection = ds.connection(COLLECTION_FREERUN_PROCESS) connection.create_index([(PROCESS_NAME, pymongo.ASCENDING), (ENTRY_NAME, pymongo.ASCENDING)], unique=True) connection = ds.connection(COLLECTION_UNIT_OF_WORK) connection.create_index([(PROCESS_NAME, pymongo.ASCENDING), (TIMEPERIOD, pymongo.ASCENDING), (START_OBJ_ID, pymongo.ASCENDING), (END_OBJ_ID, pymongo.ASCENDING)], unique=True) connection = ds.connection(COLLECTION_UOW_LOG) connection.create_index([(RELATED_UNIT_OF_WORK, pymongo.ASCENDING)], unique=True) # expireAfterSeconds: <int> Used to create an expiring (TTL) collection. # MongoDB will automatically delete documents from this collection after <int> seconds. # The indexed field must be a UTC datetime or the data will not expire. ttl_seconds = settings.settings['uow_log_ttl_days'] * 86400 # number of seconds for TTL connection.create_index(CREATED_AT, expireAfterSeconds=ttl_seconds) for collection_name in [COLLECTION_JOB_HOURLY, COLLECTION_JOB_DAILY, COLLECTION_JOB_MONTHLY, COLLECTION_JOB_YEARLY]: connection = ds.connection(collection_name) connection.create_index([(PROCESS_NAME, pymongo.ASCENDING), (TIMEPERIOD, pymongo.ASCENDING)], unique=True) logger.info('*scheduler* db has been recreated')
def __init__(self, process_name): """ renames process to SynergyYYY and creates PID file """ self.process_name = process_name self.logger = get_logger(process_name) # process-related activities setproctitle.setproctitle(settings.settings['process_prefix'] + self.process_name) create_pid_file(self.process_name)
def setUp(self): self.process_name = PROCESS_ALERT_DAILY self.logger = get_logger(self.process_name) self.uow_id = create_and_insert_unit_of_work(self.process_name, 'range_start', 'range_end') self.uow_id = str(self.uow_id) self.uow_dao = UnitOfWorkDao(self.logger) self.log_recording_dao = LogRecordingDao(self.logger)
def create_and_insert_unit_of_work(process_name, start_id, end_id, state=unit_of_work.STATE_REQUESTED, timeperiod='INVALID_TIMEPERIOD'): """ method creates and inserts a unit_of_work into DB :return id of the created object in the db""" uow = create_unit_of_work(process_name, start_id, end_id, timeperiod, state) logger = get_logger(process_name) uow_dao = UnitOfWorkDao(logger) uow_id = uow_dao.insert(uow) return uow_id
def __init__(self, scheduler): self.logger = get_logger(PROCESS_GC, append_to_console=False, redirect_stdstream=False) self.managed_handlers = scheduler.managed_handlers self.mq_transmitter = MqTransmitter(self.logger) self.timetable = scheduler.timetable self.lock = Lock() self.uow_dao = UnitOfWorkDao(self.logger) self.reprocess_uows = collections.defaultdict(PriorityQueue) self.timer = RepeatTimer(settings.settings['gc_run_interval'], self._run)
def test_select_reprocessing_candidates(self): logger = get_logger(PROCESS_UNIT_TEST) uow_dao = UnitOfWorkDao(logger) try: initial_candidates = uow_dao.get_reprocessing_candidates() except: initial_candidates = [] try: initial_positive_candidates = uow_dao.get_reprocessing_candidates( '2010123123') except: initial_positive_candidates = [] positive_timeperiods = { u'2010123123': PROCESS_SITE_HOURLY, # hourly time qualifier u'2010123100': PROCESS_SITE_DAILY, # daily time qualifier u'2010120000': PROCESS_SITE_MONTHLY, # monthly time qualifier u'2010000000': PROCESS_SITE_YEARLY } # yearly time qualifier negative_timeperiods = { u'2009123123': PROCESS_SITE_HOURLY, # hourly time qualifier u'2009123100': PROCESS_SITE_DAILY, # daily time qualifier u'2009120000': PROCESS_SITE_MONTHLY, # monthly time qualifier u'2009000000': PROCESS_SITE_YEARLY } # yearly time qualifier all_timeperiods = dict() all_timeperiods.update(positive_timeperiods) all_timeperiods.update(negative_timeperiods) created_uow = [] for timeperiod, process_name in all_timeperiods.items(): created_uow.append( create_and_insert_unit_of_work( process_name, 0, 1, timeperiod=timeperiod, state=unit_of_work.STATE_INVALID)) candidates = uow_dao.get_reprocessing_candidates('2010123123') self.assertEqual( len(candidates) - len(initial_positive_candidates), len(positive_timeperiods)) candidates = uow_dao.get_reprocessing_candidates() self.assertEqual( len(candidates) - len(initial_candidates), len(all_timeperiods)) for uow_id in created_uow: uow_dao.remove(uow_id)
def setUp(self): self.logger = get_logger(PROCESS_UNIT_TEST) self.time_table_mocked = mock.create_autospec(Timetable) self.uow_dao_mocked = mock.create_autospec(UnitOfWorkDao) self.job_dao_mocked = mock.create_autospec(JobDao) self.sm_real = AbstractStateMachine(self.logger, self.time_table_mocked, 'AbstractStateMachine') self.sm_real.uow_dao = self.uow_dao_mocked self.sm_real.job_dao = self.job_dao_mocked self.original_time_grouping = {process_name: context.process_context[process_name].time_grouping for process_name in [PROCESS_SITE_HOURLY, PROCESS_SITE_DAILY, PROCESS_SITE_MONTHLY, PROCESS_SITE_YEARLY] }
def __init__(self, logger=None, box_id=None): super(SupervisorConfigurator, self).__init__() self.logger = logger if logger else get_logger(PROCESS_SUPERVISOR, append_to_console=True) self.box_id = box_id if box_id else get_box_id(self.logger) self.bc_dao = BoxConfigurationDao(self.logger) self.process_map = dict() for process_name in context.process_context: try: self.process_map[process_name] = SupervisorEntry(process_name) except ValueError: continue
def update_db(): """ writes to managed_process table records from the context.process_context """ logger = get_logger(PROCESS_SCHEDULER) managed_process_dao = ManagedProcessDao(logger) managed_process_dao.clear() for process_name, process_entry in context.process_context.items(): if not isinstance(process_entry, ManagedProcessEntry): continue managed_process_dao.update(process_entry) logger.info('Updated DB with process entry {0} from the context.'.format(process_entry.key))
def __init__(self): super(SupervisorConfigurator, self).__init__() self.logger = get_logger(PROCESS_SUPERVISOR, True) self.bc_dao = BoxConfigurationDao(self.logger) self.box_id = get_box_id(self.logger) self.process_map = dict() for process_name in context.process_context: try: self.process_map[process_name] = SupervisorEntry(process_name) except ValueError: continue
def __init__(self, mbean): local.application = self self.mx_thread = None self.mbean = mbean jinja_env.globals['mbean'] = mbean self.dispatch = SharedDataMiddleware(self.dispatch, { '/static': STATIC_PATH }) # during the get_logger call a 'werkzeug' logger will be created # later, werkzeug._internal.py -> _log() will assign the logger to global _logger variable self.logger = get_logger(PROCESS_MX)
def setUp(self): self.logger = get_logger(PROCESS_UNIT_TEST) self.time_table_mocked = mock.create_autospec(Timetable) self.job_dao_mocked = mock.create_autospec(JobDao) self.uow_dao_mocked = mock.create_autospec(UnitOfWorkDao) self.ds_mocked = mock.create_autospec(BaseManager) self.sm_real = StateMachineContinuous(self.logger, self.time_table_mocked) self.sm_real.uow_dao = self.uow_dao_mocked self.sm_real.job_dao = self.job_dao_mocked self.sm_real.ds = self.ds_mocked self.sm_real.update_job = mock.Mock(side_effect=self.sm_real.update_job) self.sm_real._process_state_final_run = mock.Mock(side_effect=self.sm_real._process_state_final_run) self.sm_real._process_state_in_progress = mock.Mock(side_effect=self.sm_real._process_state_in_progress)
def setUp(self): self.logger = get_logger(PROCESS_UNIT_TEST) self.time_table_mocked = mock.create_autospec(Timetable) self.job_dao_mocked = mock.create_autospec(JobDao) self.uow_dao_mocked = mock.create_autospec(UnitOfWorkDao) self.ds_mocked = mock.create_autospec(BaseManager) self.sm_real = StateMachineDiscrete(self.logger, self.time_table_mocked) self.sm_real.uow_dao = self.uow_dao_mocked self.sm_real.job_dao = self.job_dao_mocked self.sm_real.ds = self.ds_mocked self.sm_real._StateMachineDiscrete__process_non_finalizable_job = mock.Mock() self.sm_real._StateMachineDiscrete__process_finalizable_job = mock.Mock() self.sm_real._process_state_in_progress = mock.Mock(side_effect=self.sm_real._process_state_in_progress)
def setUp(self): self.logger = get_logger(PROCESS_UNIT_TEST) self.time_table_mocked = mock.create_autospec(Timetable) self.job_dao_mocked = mock.create_autospec(JobDao) self.uow_dao_mocked = mock.create_autospec(UnitOfWorkDao) self.ds_mocked = mock.create_autospec(BaseManager) self.sm_real = StateMachineRecomputing(self.logger, self.time_table_mocked) self.sm_real.uow_dao = self.uow_dao_mocked self.sm_real.job_dao = self.job_dao_mocked self.sm_real.ds = self.ds_mocked self.sm_real.update_job = mock.Mock(side_effect=self.sm_real.update_job) self.sm_real._process_state_final_run = mock.Mock(side_effect=self.sm_real._process_state_final_run) self.sm_real._process_state_in_progress = mock.Mock(side_effect=self.sm_real._process_state_in_progress) self.sm_real._compute_and_transfer_to_final_run = \ mock.Mock(side_effect=self.sm_real._compute_and_transfer_to_final_run)
def create_site_stats(collection_name, time_qualifier, seed='RANDOM_SEED_OBJECT'): logger = get_logger(PROCESS_UNIT_TEST) site_dao = SiteDao(logger) rnd = MT19937(seed) object_ids = [] for i in range(TOTAL_ENTRIES): key = generate_site_composite_key(i, time_qualifier) site_stat = SiteStatistics() site_stat.key = key site_stat.stat.number_of_visits = rnd.extract_number() site_stat.stat.total_duration = rnd.extract_number() items = _generate_entries('os_', 5, i) site_stat.stat.os = items items = _generate_entries('browser_', 5, i) site_stat.stat.browsers = items items = dict() items['(320, 240)'] = 3 items['(640, 480)'] = 5 items['(1024, 960)'] = 7 items['(1280, 768)'] = 9 site_stat.stat.screen_res = items items = dict() items['ca_en'] = 3 items['ca_fr'] = 5 items['ua_uk'] = 7 items['us_en'] = 9 site_stat.stat.languages = items items = dict() items['ca'] = 3 items['fr'] = 5 items['uk'] = 7 items['us'] = 9 site_stat.stat.countries = items stat_id = site_dao.insert(collection_name, site_stat) object_ids.append(stat_id) return object_ids
def test_select_reprocessing_candidates(self): logger = get_logger(PROCESS_UNIT_TEST) uow_dao = UnitOfWorkDao(logger) try: initial_candidates = uow_dao.get_reprocessing_candidates() except: initial_candidates = [] try: initial_positive_candidates = uow_dao.get_reprocessing_candidates('2010123123') except: initial_positive_candidates = [] positive_timeperiods = {u'2010123123': PROCESS_SITE_HOURLY, # hourly time qualifier u'2010123100': PROCESS_SITE_DAILY, # daily time qualifier u'2010120000': PROCESS_SITE_MONTHLY, # monthly time qualifier u'2010000000': PROCESS_SITE_YEARLY} # yearly time qualifier negative_timeperiods = {u'2009123123': PROCESS_SITE_HOURLY, # hourly time qualifier u'2009123100': PROCESS_SITE_DAILY, # daily time qualifier u'2009120000': PROCESS_SITE_MONTHLY, # monthly time qualifier u'2009000000': PROCESS_SITE_YEARLY} # yearly time qualifier all_timeperiods = dict() all_timeperiods.update(positive_timeperiods) all_timeperiods.update(negative_timeperiods) created_uow = [] for timeperiod, process_name in all_timeperiods.items(): created_uow.append(create_and_insert_unit_of_work(process_name, 0, 1, timeperiod=timeperiod, state=unit_of_work.STATE_INVALID)) candidates = uow_dao.get_reprocessing_candidates('2010123123') self.assertEqual(len(candidates) - len(initial_positive_candidates), len(positive_timeperiods)) candidates = uow_dao.get_reprocessing_candidates() self.assertEqual(len(candidates) - len(initial_candidates), len(all_timeperiods)) for uow_id in created_uow: uow_dao.remove(uow_id)
def reset_db(): """ drops *synergy.flow* tables and re-creates them """ logger = get_logger(PROCESS_SCHEDULER) logger.info('Starting *synergy.flow* tables reset') ds = ds_manager.ds_factory(logger) ds._db.drop_collection(COLLECTION_STEP) ds._db.drop_collection(COLLECTION_FLOW) connection = ds.connection(COLLECTION_STEP) connection.create_index([(FLOW_NAME, pymongo.ASCENDING), (STEP_NAME, pymongo.ASCENDING), (TIMEPERIOD, pymongo.ASCENDING)], unique=True) connection = ds.connection(COLLECTION_FLOW) connection.create_index([(FLOW_NAME, pymongo.ASCENDING), (TIMEPERIOD, pymongo.ASCENDING)], unique=True) logger.info('*synergy.flow* tables have been recreated')
def create_session_stats(composite_key_function, seed='RANDOM_SEED_OBJECT'): logger = get_logger(PROCESS_UNIT_TEST) ss_dao = SingleSessionDao(logger) time_array = [ '20010303102210', '20010303102212', '20010303102215', '20010303102250' ] rnd = MT19937(seed) object_ids = [] for i in range(TOTAL_ENTRIES): session = SingleSession() session.key = composite_key_function(i, TOTAL_ENTRIES) session.ip = '192.168.0.2' if i % 3 == 0: session.user_profile.screen_res = (240, 360) elif i % 5 == 0: session.user_profile.screen_res = (360, 480) else: session.user_profile.screen_res = (760, 980) if i % 2 == 0: session.user_profile.os = 'Linux' session.user_profile.browser = 'FF {0}'.format(i % 4) session.user_profile.language = 'en_ca' session.user_profile.country = 'ca' else: session.user_profile.os = 'Windows' session.user_profile.browser = 'IE {0}'.format(i % 9) session.user_profile.language = 'ua_uk' session.user_profile.country = 'eu' session.browsing_history.total_duration = rnd.extract_number() session.browsing_history.number_of_pageviews = rnd.extract_number() for index in range(4): session.browsing_history.number_of_entries = index + 1 session.browsing_history.set_entry_timestamp( index, time_array[index]) sess_id = ss_dao.update(session) object_ids.append(sess_id) return object_ids
def create_session_stats(composite_key_function, seed='RANDOM_SEED_OBJECT'): logger = get_logger(PROCESS_UNIT_TEST) ss_dao = SingleSessionDao(logger) time_array = ['20010303102210', '20010303102212', '20010303102215', '20010303102250'] rnd = MT19937(seed) object_ids = [] for i in range(TOTAL_ENTRIES): session = SingleSession() session.key = composite_key_function(i, TOTAL_ENTRIES) session.ip = '192.168.0.2' if i % 3 == 0: session.user_profile.screen_res = (240, 360) elif i % 5 == 0: session.user_profile.screen_res = (360, 480) else: session.user_profile.screen_res = (760, 980) if i % 2 == 0: session.user_profile.os = 'Linux' session.user_profile.browser = 'FF {0}'.format(i % 4) session.user_profile.language = 'en_ca' session.user_profile.country = 'ca' else: session.user_profile.os = 'Windows' session.user_profile.browser = 'IE {0}'.format(i % 9) session.user_profile.language = 'ua_uk' session.user_profile.country = 'eu' session.browsing_history.total_duration = rnd.extract_number() session.browsing_history.number_of_pageviews = rnd.extract_number() for index in range(4): session.browsing_history.number_of_entries = index + 1 session.browsing_history.set_entry_timestamp(index, time_array[index]) sess_id = ss_dao.update(session) object_ids.append(sess_id) return object_ids
def __init__(self, process_name): if process_name not in context.process_context: raise ValueError('SupervisorEntry: process {0} is not found in process_context. Aborting' .format(process_name)) self.logger = get_logger(PROCESS_SUPERVISOR, True) self.process_name = process_name self.re_boxes = context.process_context[process_name].present_on_boxes self.re_boxes = self.re_boxes if self.re_boxes else list() self.re_co_boxes = [] # compiled RE of boxes where the process should be present for re_box in self.re_boxes: try: if isinstance(re_box, string_types): re_box = re_box.lower() self.re_co_boxes.append(re.compile(re_box)) elif isinstance(re_box, int): self.re_co_boxes.append(re_box) else: raise ValueError('SupervisorEntry support (string, integer) values. Type {0} unsupported' .format(type(re_box))) except TypeError: self.logger.warning('SupervisorEntry compilation error for {0}'.format(re_box))
def synch_db(): """ function reads managed_process and updates context entries appropriately """ logger = get_logger(PROCESS_SCHEDULER) managed_process_dao = ManagedProcessDao(logger) try: process_entries = managed_process_dao.get_all() except LookupError: logger.error('Synergy DB is not initialized. Aborting.') exit(1) for process_entry in process_entries: process_name = process_entry.process_name if process_name not in context.process_context: logger.warning('Process {0} has no reflection in the context. Skipping it.'.format(process_name)) continue if not isinstance(context.process_context[process_name], ManagedProcessEntry): logger.error('Process entry {0} of non-managed type {1} found in managed_process table. Skipping it.' .format(process_name, context.process_context[process_name].__class__.__name__)) continue context.process_context[process_name] = process_entry logger.info('Context updated with process entry {0}.'.format(process_entry.key))
def clean_session_entries(): logger = get_logger(PROCESS_UNIT_TEST) ss_dao = SingleSessionDao(logger) for i in range(base_fixtures.TOTAL_ENTRIES): key = generate_session_composite_key(i, base_fixtures.TOTAL_ENTRIES) ss_dao.remove(key)
def clean_site_entries(collection_name, time_qualifier): logger = get_logger(PROCESS_UNIT_TEST) site_dao = SiteDao(logger) for i in range(TOTAL_ENTRIES): key = generate_site_composite_key(i, time_qualifier) site_dao.remove(collection_name, key[0], key[1])
def setUp(self): super(TestPublishersPool, self).setUp() self.logger = get_logger(PROCESS_UNIT_TEST)