def run(self, stream): job_name = '' abort = self.get_abort() stop = self.get_stop() settings = self.get_settings() paprika_ds = DatasourceBuilder.build('paprika-ds.json') connector = ConnectorFactory.create_connector(paprika_ds) logger = Logger(connector, self) job_repository = JobRepository(connector) job = job_repository.job() job_name = job['job_name'] logger.trace(job_name, 'worker #' + str(self.get_id()) + " started.") while self.is_running(): try: url = stream['url'] username = stream['username'] password = stream['password'] auth = (username, password) # when streaming works the code stops here. # this is the reason why we use a Process instead of a Thread. A process can be terminated. StreamScraper.listen(connector, job_name, stream, url, auth) # check if we need to abort, can be called from the main thread or other thread aborted = abort.is_aborted() self.running(not aborted) # check if we need to stop, will be set by the agent's WatchWorker thread if not aborted: stopped = stop.is_stopped() self.running(not stopped) connector.close() logger.trace(job_name, 'worker #' + str(self.get_id()) + " executed.") except: aborted = abort.is_aborted() self.running(not aborted) if not aborted: stopped = stop.is_stopped() self.running(not stopped) result = Traceback.build() logger.fatal(job_name, result['message'], result['backtrace']) connector.close() time.sleep(settings['worker_exception_delay'])
def run(self, scheduled_event, test_mode=False): abort = self.get_abort() stop = self.get_stop() paprika_ds = DatasourceBuilder.build('paprika-ds.json') connector = ConnectorFactory.create_connector(paprika_ds) logger = Logger(connector, self) job_repository = JobRepository(connector) job = job_repository.job() job_name = job['job_name'] settings = self.get_settings() while self.is_running(): try: now = datetime.now() scheduled_event_repository = ScheduledEventRepository( connector) scheduled_event = scheduled_event_repository.find_by_hashcode( scheduled_event['hashcode']) job_repository = JobRepository(connector) event_repository = EventRepository(connector) repetition = scheduled_event['repetition'] intermission = int(scheduled_event['intermission']) expected = datetime.strptime(scheduled_event['expected'], "%Y-%m-%d %H:%M:%S") next_expected = None if expected < now: if repetition == 'HOURS': next_expected = expected + timedelta( hours=int(intermission)) while next_expected < now: next_expected += timedelta(hours=int(intermission)) if repetition == 'DAYS': next_expected = expected + timedelta( days=int(intermission)) while next_expected < now: next_expected += timedelta(days=int(intermission)) if repetition == 'MINUTES': next_expected = expected + timedelta( minutes=int(intermission)) while next_expected < now: next_expected += timedelta( minutes=int(intermission)) if next_expected: message = dict() message['id'] = scheduled_event['id'] message['expected'] = next_expected.__str__() scheduled_event_repository.expected(message) job = job_repository.job() event_job_name = job['job_name'] process = ProcessService.create_process( connector, scheduled_event['pdn_id'], event_job_name, scheduled_event['e_pdn_id']) event = dict() event['state'] = 'READY' event['repetition'] = repetition event['intermission'] = intermission event['expected'] = scheduled_event['expected'] event['job_name'] = event_job_name event['pcs_id'] = process['id'] event = event_repository.insert(event) process_property_repository = ProcessPropertyRepository( connector) process_property_repository.set_property( process, 'event_id', event['id']) ProcessService.execute_process(connector, process) # check if we need to abort, can be called from the main thread or other thread aborted = abort.is_aborted() self.running(not aborted) # check if we need to stop, will be set by the agent's WatchWorker thread if not aborted: stopped = stop.is_stopped() self.running(not stopped) # check for test_mode, break the loop if test_mode: self.running(False) connector.close() time.sleep(settings['worker_idle_delay']) logger.trace(job_name, 'worker #' + str(self.get_id()) + " executed.") except: aborted = abort.is_aborted() self.running(not aborted) if not aborted: stopped = stop.is_stopped() self.running(not stopped) result = Traceback.build() logger.fatal(job_name, result['message'], result['backtrace']) connector.close() time.sleep(settings['worker_exception_delay'])
def run(self, queue): abort = self.get_abort() claim = self.get_claim() stop = self.get_stop() settings = self.get_settings() paprika_ds = DatasourceBuilder.build('paprika-ds.json') connector = ConnectorFactory.create_connector(paprika_ds) logger = Logger(connector, self) job_repository = JobRepository(connector) job = job_repository.job() job_name = job['job_name'] message = None while self.is_running(): try: message = None # retrieve the next message message_repository = MessageRepository(connector) message = message_repository.dequeue(claim, queue) if message: consumer = ClassLoader.find(message['consumer']) consumer.action(connector, message) message_repository.state(queue, message, 'PROCESSED') # check if we need to abort, can be called from the main thread or other thread aborted = abort.is_aborted() self.running(not aborted) # check if we need to stop, will be set by the agent's WatchWorker thread if not aborted: stopped = stop.is_stopped() self.running(not stopped) # no message to process if not message: connector.close() time.sleep(settings['worker_idle_delay']) logger.trace(job_name, 'worker #' + str(self.get_id()) + " executed.") except: aborted = abort.is_aborted() self.running(not aborted) if not aborted: stopped = stop.is_stopped() self.running(not stopped) result = Traceback.build() if message: try: message_repository = MessageRepository(connector) result['id'] = message['id'] message_repository.state(queue, result, 'FAILED') except: logger.fatal(job_name, 'Failed to persist message failure') logger.fatal(job_name, result['message'], result['backtrace']) connector.close() time.sleep(settings['worker_exception_delay'])
def run(self, location): abort = self.get_abort() stop = self.get_stop() paprika_ds = DatasourceBuilder.build('paprika-ds.json') connector = ConnectorFactory.create_connector(paprika_ds) logger = Logger(connector, self) job_repository = JobRepository(connector) job = job_repository.job() job_name = job['job_name'] settings = self.get_settings() while self.is_running(): try: properties = PropertyRepository(connector) registry = FileRepository(connector) rule_repository = RuleRepository(connector) excluded_extensions = properties.get_property( 'scanner.excluded_extensions') stable_check_delay = properties.get_property( 'scanner.stable_check_delay') url = location['url'] patterns = location['patterns'] client = VfsFactory.create_client(url) client.set_excluded_extensions(excluded_extensions) client.set_stable_check_delay(int(stable_check_delay)) client.set_regular_expressions(patterns) path = client.get_path() recursive = int(location['recursive']) depth = int(location['depth']) client.connect() files = client.list_stable(path, recursive=recursive, depth=depth) for file in files: registered_file = registry.get_by_hashcode( file['hashcode']) if not registered_file: # find the rule found_rule = None rules = rule_repository.find_by_location(location) for rule in rules: if Matcher.match(ReMethod, rule['pattern'], file['filename']): found_rule = rule if not found_rule: found_rule = rule_repository.find_failsafe() job = job_repository.job() file_job_name = job['job_name'] logger.info( file_job_name, "file: " + file['url'] + '/' + file['filename'] + " rule: " + found_rule['rule'] + " hascode:" + file['hashcode']) process = ProcessService.create_process( connector, found_rule['pdn_id'], file_job_name, found_rule['e_pdn_id']) message = dict() message['job_name'] = file_job_name message['filename'] = file['filename'] message['path'] = file['path'] message['pattern'] = found_rule['pattern'] message['rle_id'] = found_rule['id'] message['rule'] = found_rule['rule'] message['pickup_location'] = file['url'] message['filesize'] = file['size'] message['hashcode'] = file['hashcode'] message['pcs_id'] = process['id'] message['state'] = 'READY' registered_file = registry.insert(message) process_property_repository = ProcessPropertyRepository( connector) process_property_repository.set_property( process, 'file_id', registered_file['id']) process_property_repository.set_property( process, 'pickup_location', file['url']) process_property_repository.set_property( process, 'path', file['path']) process_property_repository.set_property( process, 'payload', json.dumps({ 'filename': file['filename'], 'job_name': file_job_name })) ProcessService.execute_process(connector, process) client.close() # check if we need to abort, can be called from the main thread or other thread aborted = abort.is_aborted() self.running(not aborted) # check if we need to stop, will be set by the agent's WatchWorker thread if not aborted: stopped = stop.is_stopped() self.running(not stopped) connector.close() time.sleep(settings['worker_idle_delay']) logger.trace(job_name, 'worker #' + str(self.get_id()) + " executed.") except: aborted = abort.is_aborted() self.running(not aborted) if not aborted: stopped = stop.is_stopped() self.running(not stopped) result = Traceback.build() logger.fatal(job_name, result['message'], result['backtrace']) connector.close() time.sleep(settings['worker_exception_delay'])