class NifStream: """ Processes change messages and inserts or updates the corresponding object in api. .. danger:: Private members are also documented. :py:meth:`.run` and :py:meth:`.slow` are the publicly available methods. Handles the following NIF entity_types: * Person :py:class:`typings.person.Person` * Function :py:class:`typings.function.Function` * Organization :py:class:`typings.organization.Organization` * Competence :py:class:`typings.competence.Competence` * License :py:class:`typings.license.License` :py:meth:`.run` uses :py:meth:`pymongo.MongoClient.watch` context to read the oplog change stream and react on each inserts. :py:meth:`.run` will check :py:attr:`.resume_token` in :py:attr:`.resume_token_path` and try to resume if exists and valid. :py:meth:`.run` will block forever or until :py:attr:`max_restarts` is reached. .. caution:: If :py:attr:`resume_token` is out of date or mismatches the oplog, it will be deleted and will not be able to resume. .. note:: To address errors like shutdowns or :py:attr:`.resume_token` out of date the class implements :py:meth:`.slow` to automatically try to address the errors by getting change messages through :py:mod:`eve_api`. Usage:: from stream import NifStream stream = NifStream() stream.recover(errors=False) # fix remaining change messages stream.run() # Blocks forever """ def __init__(self): self.log = AppLogger(name='nif-stream', stdout=False, last_logs=0, restart=True) self.restarts = 0 self.max_restarts = 10 self.token_reset = False self.resume_token = None self.resume_token_path = Path(STREAM_RESUME_TOKEN_FILE) self.resume_token_lock = False self.tz_local = tz.gettz("Europe/Oslo") self.tz_utc = tz.gettz('UTC') # Lungo Api self.api_collections = { 'Person': { 'url': '{}/persons/process'.format(API_URL), 'id': 'id' }, 'Function': { 'url': '{}/functions/process'.format(API_URL), 'id': 'id' }, 'Organization': { 'url': '{}/organizations/process'.format(API_URL), 'id': 'id' }, 'Competence': { 'url': '{}/competences/process'.format(API_URL), 'id': 'id' }, 'License': { 'url': '{}/licenses/process'.format(API_URL), 'id': 'id' }, 'Changes': { 'url': '{}/integration/changes'.format(API_URL), 'id': 'id' }, } # NIF Api # Needs one of the clubs? Using platform user! self.api_license = NifApiIntegration(username=NIF_FEDERATION_USERNAME, password=NIF_FEDERATION_PASSWORD, log_file=STREAM_LOG_FILE, realm=NIF_REALM) self.api_competence = NifApiCompetence( username=NIF_FEDERATION_USERNAME, password=NIF_FEDERATION_PASSWORD, log_file=STREAM_LOG_FILE, realm=NIF_REALM) self.api = NifApiIntegration(username=ACLUBU, password=ACLUBP, log_file=STREAM_LOG_FILE, realm=NIF_REALM) status, hello = self.api._test() if status is not True: self.log.error('[TERMINATING] Problems with NIF authentication') sys.exit(0) # Change stream client = pymongo.MongoClient() self.db = client.ka def recover(self, errors=False, realm=NIF_REALM): """Get change messages with status: errors=True * pending * error errors=False * ready via :py:mod:`eve_api` and process the messages in :py:meth:`._process_change` Sets the :py:attr:`.resume_token_lock` to avoid writing a :py:attr:`.resume_token` on updates. """ r = False self.resume_token_lock = True if errors is True: try: r = requests.get( '%s?where={"_status": {"$in": ["pending", "error"]},"_realm":"%s"}&max_results=100000' % (self.api_collections['Changes']['url'], realm), headers=API_HEADERS) except Exception as e: self.log.exception( 'Exception getting error and pending changes in recover') else: try: r = requests.get( '%s?where={"_status": {"$in": ["ready"]},"_realm":"%s"}&max_results=100000' % (self.api_collections['Changes']['url'], realm), headers=API_HEADERS) except Exception as e: self.log.exception( 'Exception getting ready changes in recover') if r and r.status_code == 200: slow = r.json() for change in slow['_items']: self._process_change(ChangeStreamItem(change)) self.resume_token_lock = False def _process_change(self, change) -> bool: """ Process a change message. Will call the equivalent `_get_<entity_type>` method corresponding to the entity_type of the change message. Each of the methods will in turn call and return :py:meth:`._process` with correct payload. :param change: the change message :type change: dict :return: True on success :rtype: bool """ status = False if change.set_status('pending'): try: # Get object from nif_api if change.entity_type == 'Person': status, result = self.api.get_person(change.get_id()) elif change.entity_type == 'Function': status, result = self.api.get_function(change.get_id()) elif change.entity_type == 'Organization': status, result = self.api.get_organization( change.get_id(), NLF_ORG_STRUCTURE) elif change.entity_type == 'License': status, result = self.api_license.get_license( change.get_id()) elif change.entity_type == 'Competence': status, result = self.api_competence.get_competence( change.get_id()) # Insert into Lungo api if status is True: pstatus, pmessage = self._process(result, change) if pstatus is True: # Sets the change message status change.set_status('finished') return True else: change.set_status('error', pmessage) else: self.log.error( 'NIF API error for {} ({}) change message: {}'.format( change.entity_type, change.id, change._id)) change.set_status('error', 'Got http {} for {} {}'.format( status, change.entity_type, change.get_id())) # Error in nif_api except Exception as e: self.log.exception('Error in process change') change.set_status('error', {'exception': str(e)}) else: self.log.error('Cant change Person status to pending') raise Exception('Cant change Person status to pending') return False def run(self): """Read the mongo change stream On all `insert` operations to `integration/changes` we retrieve the full document (change message) and act accordingly via :py:meth:`_process_change` .. note:: Starting with MongoDB 4.2 `startAfter` will be replaced by `resumeAfter` which will resume on errors unlike current behaviour of `resumeAfter` """ self.log.debug('[Stream started]') self._read_resume_token() if self.resume_token is not None: resume_after = {'_data': self.resume_token} self.log.debug('Got resume token') else: resume_after = None self.log.debug('No resume token') try: # @TODO on upgrade to mongo 4.2 use startAfter instead self.log.debug('Change stream watch starting...') with self.db.integration_changes.watch( pipeline=[{ '$match': { 'operationType': 'insert' } }], resume_after=resume_after) as stream: for change in stream: if change['fullDocument']['_realm'] == NIF_REALM: self.log.debug( 'Processing change message: {} {}'.format( change['fullDocument']['entity_type'], change['fullDocument']['id'])) # Always set new resume token self.resume_token = change['_id']['_data'] if self._process_change( ChangeStreamItem( change['fullDocument'])) is True: self.log.debug('Successfully processed') self._write_resume_token() self.restarts = 0 except pymongo.errors.PyMongoError as e: self.log.error('Unrecoverable PyMongoError, restarting') self.restarts += 1 if self.restarts > self.max_restarts: self.log.error('Too many retries') pass else: self.run() except Exception as e: self.log.exception('Unknown error in change stream watch') self.restarts += 1 if self.restarts > self.max_restarts: self.log.error('Too many restarts: {}'.format(self.restarts)) if not self.token_reset: self.log.error('Resetting resume token') self._reset_token() self.run() elif not self.token_reset: self.run() def _merge_dicts(self, x, y): """Simple and safe merge dictionaries :param x: first dictionary :type x: dict :param y: second dictionary :type y: dict :return: merged dict z :rtype: dict """ z = x.copy() # start with x's keys and values z.update(y) # modifies z with y's keys and values & returns None return z def _merge_user_to(self, id, merged): """Update all persons in api which ``id`` is merged from. .. note:: NIF describes ``merged_from`` which makes no sense. For the api to be able to hand out a 301 each object merged to ``id`` needs a ``merged_to: id``. This can be acted upon and resolves unlimited levels of merging. :param id: Person id :type id: int :param merged: List of person id's merged from to id :type merged: list[int] :return: None :rtype: None """ if len(merged) > 0: for m in merged: u = requests.get('%s/%s' % (self.api_collections['Person']['url'], m), headers=API_HEADERS) if u.status_code == 200: u_json = u.json() u_u = requests.patch( '%s/%s' % (self.api_collections['Person']['url'], u_json['_id']), json={'_merged_to': id}, headers=self._merge_dicts( API_HEADERS, {'If-Match': u_json['_etag']})) if u_u.status_code == 200: pass elif u.status_code == 404: """Not found, create a user!""" u_p = requests.post(self.api_collections['Person']['url'], json={ 'id': m, '_merged_to': id }, headers=API_HEADERS) if u_p.status_code != 201: self.log.error('Error merge to ', u_p.text) def _write_resume_token(self): """Writes the current :py:attr:`resume_token` to :py:attr:`resume_token_path` file""" if self.resume_token_lock is not True: try: with open(STREAM_RESUME_TOKEN_FILE, 'wb+') as f: f.write(self.resume_token) except Exception as e: self.log.exception('Could not write resume token') def _read_resume_token(self): """Reads the value of :py:attr:`resume_token_path` file into :py:attr:`resume_token`""" try: with open(STREAM_RESUME_TOKEN_FILE, 'rb') as f: self.resume_token = f.read() except FileNotFoundError: self.resume_token = None except: self.log.exception('Error reading resume token') self.resume_token = None def _reset_token(self, delete=True): """Deletes or truncates the :py:attr:`resume_token_path` file :param delete: If True then delete file, else truncate :type delete: bool """ if delete is True: try: self.resume_token_path.unlink() except: self.log.exception('Could not delete resume token') else: try: with open(STREAM_RESUME_TOKEN_FILE, 'wb+') as f: f.write('') except: self.log.exception('Could not truncate resume token') self.token_reset = True def _process(self, payload, change): """ Update or create a document based on ``payload`` :param payload: The payload for the document :type payload: dict :param change: The change object :type change: ChangeStreamItem :return: True on success :rtype: bool """ api_document = requests.get( '%s/%s' % (self.api_collections[change.get_value('entity_type')]['url'], payload[self.api_collections[change.get_value('entity_type')] ['id']]), headers=API_HEADERS) rapi = False # Does not exist, insert if api_document.status_code == 404: # Geocode if change.get_value( 'entity_type') == 'Person' and STREAM_GEOCODE is True: payload = add_person_location(payload) elif change.get_value( 'entity_type' ) == 'Organization' and STREAM_GEOCODE is True: payload = add_organization_location(payload) rapi = requests.post( self.api_collections[change.get_value('entity_type')]['url'], data=json.dumps(payload, cls=EveJSONEncoder), headers=API_HEADERS) # Do exist, replace elif api_document.status_code == 200: api_existing_object = api_document.json() # Only update if newer if dateutil.parser.parse( api_existing_object['_updated']) < change.get_modified( ).replace(tzinfo=self.tz_local): # Geocode if change.get_value( 'entity_type') == 'Person' and STREAM_GEOCODE is True: payload = add_person_location(payload) # Really need to preserve the activities for clubs type_id 5 if change.get_value( 'entity_type') == 'Organization' and payload.get( 'type_id', 0) == 5: payload.pop('activities', None) payload.pop('main_activity', None) rapi = requests.patch( '%s/%s' % (self.api_collections[change.get_value('entity_type')] ['url'], api_existing_object['_id']), data=json.dumps(payload, cls=EveJSONEncoder), headers=self._merge_dicts( API_HEADERS, {'If-Match': api_existing_object['_etag']})) else: rapi = requests.put( '%s/%s' % (self.api_collections[change.get_value('entity_type')] ['url'], api_existing_object['_id']), data=json.dumps(payload, cls=EveJSONEncoder), headers=self._merge_dicts( API_HEADERS, {'If-Match': api_existing_object['_etag']})) # If obsolete, just return else: change.set_status('finished') return True, None # If successful put or post if rapi.status_code in [200, 201]: if change.get_value('entity_type') == 'Person': rapi_json = rapi.json() # Add merged to for all merged from if len(change.merged_from) > 0: self._merge_user_to(rapi_json['id'], change.merged_from) return True, None else: self.log.error( 'Error in _process for {} with id {} change {}'.format( change.get_value('entity_type'), change.id, change.get_value('_id'))) self.log.error('Error: http {} said {}'.format( rapi.status_code, rapi.text)) try: rapi_json = rapi.json() if '_issues' in rapi_json: return False, rapi_json['_issues'] except Exception as e: rapi_json = { '_issues': { 'message': 'Unknown error, got http {} with error {}'.format( rapi.status_code, str(e)) } } return False, rapi_json.get('_issues', 'Unknown error line 442')
class NifIntegrationUser: """Handles retrieving existing or creating new integration users for use with :py:mod:`sync.NifSync` and :py:mod:`stream.NifStream` .. attention:: If a user do not exist, it will be created on init. .. note:: This class only handles integration users on club level (org_type_id=5) and requires a platform user in NIF. .. attention:: It might take up to (or more) than 180 seconds from creating a user before that user can be authenticated """ def __init__(self, club_id, create_delay=1, log_file='integration_user.log'): self.ALPHNUM = ('abcdefghijklmnopqrstuvwxyz' + 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' + '01234567890') self.username = None self.password = None self.nif_user = {} self.user_id = None self.club_created = None self.club_name = None self.log_file = log_file self.log = AppLogger('klubb-{0}'.format(club_id)) self.log.debug('[Integration user]') self.club_id = club_id self.test_client = None api_user = requests.get( '%s/integration/users/?where={"club_id": %s, "_active": true, "_realm": "%s"}&max_results=1000' % (API_URL, self.club_id, NIF_REALM), headers=API_HEADERS) if api_user.status_code == 200: api_users_json = api_user.json() if len(api_users_json['_items']) > 1: # multiple users self.log.error( 'More than one active club in realm {} for club id {}'. format(NIF_REALM, self.club_id)) raise NifIntegrationUserError('More than one active club') elif len(api_users_json['_items']) == 1: # One user only api_user_json = api_users_json['_items'][0] self.username = '******'.format( NIF_CLUB_APP_ID, api_user_json['function_id'], api_user_json['username']) self.password = api_user_json['password'] self.user_id = api_user_json['id'] self.log.debug('Using existing integration user {}'.format( self.username)) if 'club_created' in api_user_json: self.club_created = api_user_json['club_created'] self.club_name = api_user_json['club_name'] else: self.club_created, self.club_name = self._get_club_details( ) if create_delay > 0: if self._time_authentication( create_delay=create_delay) is True: self.log.debug('Authentication ok') else: # @TODO set user _active = False? self.log.error('Failed authentication via Hello') time.sleep(5) elif len(api_users_json['_items'] ) == 0: # No users found but 200 anyway """Not found create user!""" self.log.debug( 'No existing integration user found but http 200, creating...' ) self.club_created, self.club_name = self._get_club_details() if self._create(): self.log.debug( 'Created integration user for club id {}'.format( self.club_id)) if create_delay > 0: self._time_authentication(create_delay=create_delay) else: raise NifIntegrationUserCreateError else: self.log.exception( 'Creation of user for club id {} failed, got {} users and http 200' .format(self.club_id, len(api_users_json['_items']))) elif api_user.status_code == 404: """Not found create user!""" self.log.debug('No existing integration user found, creating...') self.club_created, self.club_name = self._get_club_details() if self._create(): self.log.debug( 'Created integration user for club id {}'.format( self.club_id)) # if create_delay > 0: # self.log('Delaying {}s before proceeding'.format(create_delay)) # time.sleep(create_delay) # Test authentication of user # Only if create_delay > 0 # This will try to loop in intervals until authenticated or timeout if create_delay > 0: self._time_authentication(create_delay=create_delay) else: raise NifIntegrationUserCreateError else: self.log.exception('Unknown error') raise NifIntegrationUserError def _time_authentication(self, create_delay) -> bool: self.log.debug('Running auth test for {} with password {}'.format( self.username, self.password)) authenticated = False time_spent = 0 increment = 10 while not authenticated: print('.') if time_spent > create_delay: self.log.debug( 'Could not authenticate user after {} seconds'.format( time_spent)) raise NifIntegrationUserAuthenticationError( 'Can not authenticate user') authenticated = self.test_login() time.sleep(increment) time_spent += increment if time_spent > 220: self.log.debug( 'Could NOT authenticate user after {} seconds, exiting'. format(time_spent)) break if authenticated: self.log.debug( 'Authenticated user after {} seconds'.format(time_spent)) return authenticated def _get_club_details(self): response = requests.get('{}/organizations/{}'.format( API_URL, self.club_id), headers=API_HEADERS) if response.status_code == 200: r = response.json() if 'created' in r and 'name' in r: return r['created'], r['name'] return '1995-10-11T22:00:00.000000Z', 'Unknown name' def _create(self): """Creates an integration user applied to club_id Firstname: NLF<app id>-<club_id> Username: IGNLF<app_id>-<club_id> Only needs correct function Id User will end up as club user anyway""" sync_client = NifApiSynchronization(username=NIF_PLATFORM_USERNAME, password=NIF_PLATFORM_PASSWORD, realm=NIF_REALM, log_file=self.log_file) self.password = self.generate_password() club_username = '******'.format(NIF_CLUB_USERNAME_PREFIX, self.club_id) # Log user to be created self.log.debug('Creating integration user') self.log.debug('Club: {}'.format(self.club_name)) self.log.debug('User: {}'.format(club_username)) self.log.debug('Pwd: {}'.format(self.password)) # The exception is caught in the call to _create. status, self.nif_user = sync_client.create_integration_user( FirstName='{0}-{1}'.format(NIF_CLUB_FIRSTNAME_PREFIX, self.club_id), LastName='NIF.Connect', OrgId=self.club_id, Password=self.password, UserName=club_username) # Email=None) if status is True: self.log.debug( 'Created integration user {0} with password {1}'.format( club_username, self.password)) # Get correct function id function_id = 0 if 'ActiveFunctions' in self.nif_user and 'Function' in self.nif_user[ 'ActiveFunctions']: for f in self.nif_user['ActiveFunctions']['Function']: if (f['FunctionTypeId'] == NIF_INTEGRATION_FUNCTION_TYPE_ID and f['ActiveInOrgId'] == self.club_id): function_id = f['Id'] break # break for # If no function id use platform function id which works. if function_id == 0: function_id = NIF_PLATFORM_FUNCTION_ID # assign correct username and password self.username = '******'.format(NIF_CLUB_APP_ID, function_id, club_username) # Assign correct user_id if 'Id' in self.nif_user: self.user_id = self.nif_user['Id'] # Or PersonId? elif 'PersonId' in self.nif_user: self.user_id = self.nif_user['PersonId'] else: self.log.error('Could not assign user_id to integration user') raise NifIntegrationUserCreateError # Store the integration user in Lungo payload = { 'username': club_username, 'password': self.password, 'id': self.user_id, 'app_id': NIF_CLUB_APP_ID, 'function_id': function_id, 'club_id': self.club_id, 'club_name': self.club_name, 'modified': self.nif_user['LastChangedDate'], 'club_created': self.club_created, '_realm': NIF_REALM, '_active': True } api_user = requests.post('{}/integration/users'.format(API_URL), data=json.dumps(payload, cls=EveJSONEncoder), headers=API_HEADERS) if api_user.status_code == 201: self.log.debug('Successfully created user in Lungo') return True # Instead of returning void, return something else else: self.log.error('Could not create user in Lungo') self.log.error('Code: {}'.format(api_user.status_code)) self.log.error('Msg: {}'.format(api_user.text)) raise NifIntegrationUserCreateError else: self.log.error( 'Could not create integration user {0} with password {1}'. format(club_username, self.password)) self.log.error('NIF Api said:') self.log.error('Code: {}'.format(self.nif_user['ErrorCode'])) self.log.error('Message:{}'.format(self.nif_user['ErrorMessage'])) raise NifIntegrationUserCreateError return False def get_user(self): pass def generate_password(self, count=1, length=12): """ Generate password Kwargs: count (int):: How many passwords should be returned? length (int):: How many characters should the password contain allowed_chars (str):: Characters Returns: String with the password. If count > 1 then the return value will be auto_now= list of strings. """ if count == 1: return ''.join(sample(self.ALPHNUM, length)) passwords = [] while count > 0: passwords.append(''.join(sample(self.ALPHNUM, length))) count -= 1 return passwords
class Base(object): STATUS = { 'WAITING': 0, 'QUEUED': 10, 'PROCESSING': 20, 'SUCCESS': 30, 'SKIPPED': 40, 'UNSTABLE': 50, 'TIMEOUT': 60, 'CANCELLED': 70, 'FAILED': 80 } def __init__(self, module_name): self.module = module_name self.config = Config() self.log = AppLogger(self.config, self.module) def command(self, cmd, working_dir, script=False): self.log.info('Executing command: {0}\nDir: {1}'.format( cmd, working_dir)) if script: self.log.debug('Executing user command') return self.__exec_user_command(cmd, working_dir) else: self.log.debug('Executing system command') self.__exec_system_command(cmd, working_dir) def __exec_system_command(self, cmd, working_dir): self.log.debug('System command runner \nCmd: {0}\nDir: {1}'.format( cmd, working_dir)) cmd = '{0} 2>&1'.format(cmd) self.log.info('Running {0}'.format(cmd)) proc = None try: proc = subprocess.Popen( cmd, shell=True, stdout=subprocess.PIPE, cwd=working_dir, env=os.environ.copy(), universal_newlines=True) stdout, stderr = proc.communicate() returncode = proc.returncode if returncode != 0: command_status = self.STATUS['FAILED'] self.log.debug(stdout) self.log.debug(stderr) else: self.log.debug('System command completed {0}\nOut:{1}'.format( cmd, stdout)) command_status = self.STATUS['SUCCESS'] #self.log.log_command_op(stdout) self.log.debug(stdout) self.log.debug('Command completed {0}'.format(cmd)) except Exception as exc: error_message = 'Error running system command. Err: {0}'.format(exc) self.log.error(error_message) trace = traceback.format_exc() self.log.error(exc) self.log.error(trace) raise Exception(error_message) self.log.debug('Returning command status: {0}'.format(command_status)) return command_status def __exec_user_command(self, cmd, working_dir): self.log.debug('Executing streaming command {0}'.format(cmd)) current_step_state = self.STATUS['FAILED'] command_thread_result = { 'success': False, 'returncode': None } command_thread = threading.Thread( target=self.__command_runner, args=(cmd, working_dir, command_thread_result,)) command_thread.start() self.log.debug('Waiting for command thread to complete') command_thread.join(self.config['MAX_COMMAND_SECONDS']) self.log.debug('Command thread join has returned. Result: {0}'\ .format(command_thread_result)) if command_thread.is_alive(): self.log.log_command_err('Command timed out') self.log.error('Command thread is still running') is_command_success = False current_step_state = self.STATUS['TIMEOUT'] self.log.log_command_err('Command thread timed out') else: self.log.debug('Command completed {0}'.format(cmd)) is_command_success = command_thread_result['success'] if is_command_success: self.log.debug('command executed successfully: {0}'.format(cmd)) current_step_state = self.STATUS['SUCCESS'] else: error_message = 'Command failed : {0}'.format(cmd) exception = command_thread_result.get('exception', None) if exception: error_message += '\nException {0}'.format(exception) self.log.error(error_message) current_step_state = self.STATUS['FAILED'] self.log.error(error_message) self.log.flush_console_buffer() return current_step_state def __command_runner(self, cmd, working_dir, result): self.log.debug('command runner \nCmd: {0}\nDir: {1}'.format( cmd, working_dir)) cmd = '{0} 2>&1'.format(cmd) self.log.info('Running {0}'.format(cmd)) proc = None success = False try: proc = subprocess.Popen( cmd, shell=True, stdout=subprocess.PIPE, cwd=working_dir, env=os.environ.copy(), universal_newlines=True) exception = 'Invalid or no script tags received' current_group_info = None current_group_name = None current_cmd_info = None for line in iter(proc.stdout.readline, ''): self.log.debug(line) line_split = line.split('|') if line.startswith('__SH__GROUP__START__'): current_group_info = line_split[1] current_group_name = '|'.join(line_split[2:]) current_group_info = json.loads(current_group_info) show_group = current_group_info.get('is_shown', True) if show_group == 'false': show_group = False console_out = { 'consoleId': current_group_info.get('id'), 'parentConsoleId': '', 'type': 'grp', 'message': current_group_name, 'msgTimestamp': self.__get_timestamp(), 'completed': False, 'isShown': show_group } self.log.append_console_buffer(console_out) elif line.startswith('__SH__CMD__START__'): current_cmd_info = line_split[1] current_cmd_name = '|'.join(line_split[2:]) current_cmd_info = json.loads(current_cmd_info) parent_id = current_group_info.get('id') if current_group_info else None console_out = { 'consoleId': current_cmd_info.get('id'), 'parentConsoleId': parent_id, 'type': 'cmd', 'message': current_cmd_name, 'msgTimestamp': self.__get_timestamp(), 'completed': False } if parent_id: self.log.append_console_buffer(console_out) elif line.startswith('__SH__CMD__END__'): current_cmd_end_info = line_split[1] current_cmd_end_name = '|'.join(line_split[2:]) current_cmd_end_info = json.loads(current_cmd_end_info) parent_id = current_group_info.get('id') if current_group_info else None is_completed = False if current_cmd_end_info.get('completed') == '0': is_completed = True console_out = { 'consoleId': current_cmd_info.get('id'), 'parentConsoleId': parent_id, 'type': 'cmd', 'message': current_cmd_end_name, 'msgTimestamp': self.__get_timestamp(), 'completed': is_completed } if parent_id: self.log.append_console_buffer(console_out) elif line.startswith('__SH__GROUP__END__'): current_grp_end_info = line_split[1] current_grp_end_name = '|'.join(line_split[2:]) current_grp_end_info = json.loads(current_grp_end_info) is_completed = False if current_grp_end_info.get('completed') == '0': is_completed = True console_out = { 'consoleId': current_group_info.get('id'), 'parentConsoleId': '', 'type': 'grp', 'message': current_grp_end_name, 'msgTimestamp': self.__get_timestamp(), 'completed': is_completed } self.log.append_console_buffer(console_out) elif line.startswith('__SH__SCRIPT_END_SUCCESS__'): success = True break elif '__SH__ARCHIVE_END__' in line: success = True break elif line.startswith('__SH__SCRIPT_END_FAILURE__'): if current_group_info: console_out = { 'consoleId': current_group_info.get('id'), 'parentConsoleId': '', 'type': 'grp', 'message': current_group_name, 'msgTimestamp': self.__get_timestamp(), 'completed': False } self.log.append_console_buffer(console_out) success = False exception = 'Script failure tag received' break else: parent_id = current_cmd_info.get('id') if current_cmd_info else None console_out = { 'consoleId': str(uuid.uuid4()), 'parentConsoleId': parent_id, 'type': 'msg', 'message': line, 'msgTimestamp': self.__get_timestamp(), 'completed': False } if parent_id: self.log.append_console_buffer(console_out) else: self.log.debug(console_out) proc.kill() if success == False: self.log.debug('Command failure') result['returncode'] = 99 result['success'] = False result['exception'] = exception else: self.log.debug('Command successful') result['returncode'] = 0 result['success'] = True except Exception as exc: self.log.error('Exception while running command: {0}'.format(exc)) trace = traceback.format_exc() self.log.error(trace) result['returncode'] = 98 result['success'] = False result['exception'] = trace self.log.info('Command returned {0}'.format(result['returncode'])) def __get_timestamp(self): return int(time.time() * 1000000) def pop_step(self, execute_plan, step): self.log.debug('popping the top of stack: {0}'\ .format(execute_plan['steps'])) try: for k in execute_plan['steps'].keys(): if k == step['step_key']: del execute_plan['steps'][k] self.log.debug('popped out top of stack. \n stack {0}'\ .format(execute_plan['steps'])) return execute_plan except Exception as exc: self.log.error('error occurred while poping ' \ 'step: {0}'.format(str(exc))) raise exc def get_top_of_stack(self, execute_plan): error_occurred = False error_message = '' try: self.log.info('inside get_top_of_stack') steps = execute_plan.get('steps', None) if steps is None: error_message = 'No steps found in the execute plan: {0}'\ .format(execute_plan) error_occurred = True return if len(steps) == 0: self.log.info('No steps present in execute plan, returning' \ 'empty TopOfStack') return None keys = [] for k in steps.keys(): keys.append(int(str(k))) self.log.debug('steps keys {0}'.format(keys)) keys.sort() self.log.debug('sorted keys {0}'.format(keys)) current_step_key = str(keys[0]) current_step = steps[current_step_key] current_step['step_key'] = current_step_key return current_step except Exception as exc: error_message = 'Error occurred while trying to get the step' \ ' from execute plan \nError: {0} execute plan: {1}' \ .format(str(exc), execute_plan) error_occurred = True finally: if error_occurred: raise Exception(error_message)
class ApiClient: __SNKRS_PER_PAGE = 100 __API_ENDPOINT = Template( f"https://api.thesneakerdatabase.com/v1/sneakers?limit={__SNKRS_PER_PAGE}&page=$page" ) __IMG_SIZE_ARGS = dict(s="thumbUrl", m="smallImageUrl", l="imageUrl") def __init__(self, limit: int = sys.maxsize, starting_page: int = 0, image_size: str = "s"): try: size_arg = ApiClient.__IMG_SIZE_ARGS[image_size] except IndexError: raise ValueError( f"Argument size has to be one from 's' (default), 'm', 'l'! Got '{image_size}' instead." ) if starting_page < 0 or limit < 0: raise ValueError( "Starting page number and limit must not be negative!") self.limit = limit self.starting_page = starting_page self.image_size = size_arg output_dir = Path("images") / "training" / image_size / "sneakers" output_dir.mkdir(exist_ok=True, parents=True) self.output_dir = output_dir self.log = AppLogger("AsyncApiClient") self.session = None self.download_queue = None self.save_queue = None async def start_bulk_download(self): self.log.info("Starting download...") self.download_queue, self.save_queue = asyncio.Queue(), asyncio.Queue() tasks = [ asyncio.Task(self.__url_fetcher()), asyncio.Task(self.__image_fetcher()), asyncio.Task(self.__image_writer()) ] try: self.session = aiohttp.ClientSession() await asyncio.gather(*tasks) except Exception: self.log.error("Unexpected error!") self.log.warning("Terminating...") raise finally: await self.session.close() self.log.info("All jobs complete!") async def __url_fetcher(self): n_fetched = 0 page = itertools.count(self.starting_page) while n_fetched < self.limit: response = await self.__async_call_with_retry( self.session.get, self.__API_ENDPOINT.substitute(page=next(page))) sneakers = (await self.__async_call_with_retry(response.json))["results"] if len(sneakers) == 0: self.log.warning( "API ran out of sneakers! Cannot fetch more images.") break image_urls = [ sneaker["media"][self.image_size] for sneaker in sneakers[:min(self.__SNKRS_PER_PAGE, self.limit - n_fetched)] if sneaker["media"][self.image_size] is not None ] for image_url in image_urls: self.download_queue.put_nowait(image_url) n_fetched += 1 self.download_queue.put_nowait(None) self.log.info("Fetching urls complete!") async def __image_fetcher(self): while True: image_url = await self.download_queue.get() if image_url is None: break image = await self.__async_call_with_retry(self.session.get, image_url) self.save_queue.put_nowait(image) self.download_queue.task_done() self.save_queue.put_nowait(None) self.log.info("Fetching images complete!") async def __image_writer(self): while True: image = await self.save_queue.get() if image is None: break filepath = self.output_dir / Path(image.url.path).name content = await self.__async_call_with_retry(image.read) async with aiofiles.open(filepath, "wb") as f: self.log.debug(f"Saving {filepath.name}...") await f.write(content) self.save_queue.task_done() self.log.info("Saving images complete!") @staticmethod @retry(wait=wait_random_exponential(multiplier=3), stop=stop_after_attempt(3)) async def __async_call_with_retry(func: callable, *args, **kwargs): ret = await func(*args, **kwargs) return ret
class NifSync(threading.Thread): """Populate and sync change messages from NIF api Currently handles following services and respective entity types: * SynchronizationService/GetChanges3: * Person * Function * Organization * SynchronizationService/GetChangesCompetence2: * Competence * SynchronizationService/GetChangesLicense: * License .. danger:: The club (org_type_id=5) integration users can only use GetChanges3 ( :py:attr:`sync_type` ='changes'). For GetChangesCompetence2 ( :py:attr:`sync_type` ='competence') and GetChangesLicense ( :py:attr:`sync_type` = 'license') an integration user on federation level is required The class will automatically handle when to :py:meth:`populate` and :py:meth:`sync`. .. note:: :py:meth:`_check` is called on init and checks with the api to find last change message for :py:attr:`org_id` and the last message is then used as the initial starting point. :param org_id: The integration user organization id, required :type org_id: int :param login: The full path integration username ('app_id/function_id/username'), required :type login: str :param password: Password, required :type password: str :param created: A datetime string representing creation date of org_id, required :type created: str :param stopper: a threading.Event flag to exit :type stopper: threading.Event :param restart: On True will reset all AppLogger handlers :type restart: bool :param background: Sets the scheduler. Defaults to False and BlockingScheduler :type background: bool :param initial_timedelta: The initial timedelta to use from last change message in ms :type initial_timedelta: int :param overlap_timedelta: A optional timedelta for overlap functions in hours :type overlap_timedelta: int :param lock: The semaphore object, if None uses :py:class:`.FakeSemaphore` :type lock: threading.BoundedSemaphore :param sync_type: The sync type for this user, allowed ``changes``, ``competence`` and ``license``. Defaults to ``changes``. :type sync_type: str :param sync_interval: The interval for the sync scheduler in minutes. Defaults to NIF_SYNC_INTERVAL :type sync_interval: int :param populate_interval: The interval for populating in days. Defaults to NIF_POPULATE_INTERVAL :type populate_interval: int Usage - threading:: from sync import NifSync sync = NifSync(org_id, username, password) sync.start() # sync is of threading.Thread Usage - blocking:: from sync import NifSync sync = NifSync(org_id, username, password) sync.run() # sync starts without thread running Usage - with semaphore:: import threading from sync import NifSync bound_semaphore = threading.BoundedSemaphore(value=10) sync = NifSync(org_id, username, password, lock=bounding_semaphore) sync.start() # sync is of threading.Thread, semaphore has 10 slots .. note:: The usernames and passwords for integration users on club level is stored in integration/users and accessible through :py:mod:` """ def __init__(self, org_id, username, password, created, stopper=False, restart=False, background=False, initial_timedelta=0, overlap_timedelta=0, lock=None, sync_type='changes', sync_interval=NIF_CHANGES_SYNC_INTERVAL, populate_interval=NIF_POPULATE_INTERVAL): self.state = SyncState() # Init thread super().__init__(name='klubb-{0}'.format(org_id)) if sync_type in ['changes', 'license', 'competence', 'federation']: self.sync_type = sync_type else: raise Exception('{} is not a valid sync type'.format(sync_type)) self.id = org_id self.username = username self.started = datetime.now() self.sync_errors = 0 # self.sync_errors_max = 3 # Errors in a row! self.sync_interval = sync_interval # minutes self.populate_interval = populate_interval # days self.initial_timedelta = initial_timedelta self.overlap_timedelta = overlap_timedelta self.messages = 0 # Holds number of successfully processed messages self.stopper = stopper self.background = background self.initial_start = None self.from_to = [None, None] self.sync_started = False self.tz_local = tz.gettz(LOCAL_TIMEZONE) self.tz_utc = tz.gettz('UTC') # Init logger self.log = AppLogger(name='klubb-{0}'.format(org_id), stdout=not background, last_logs=100, restart=restart) # No stopper, started directly check for stream resume token! if self.stopper is False: from pathlib import Path resume_token = Path(STREAM_RESUME_TOKEN_FILE) if resume_token.is_file() is not True: self.log.warning( 'No resume token at {}'.format(STREAM_RESUME_TOKEN_FILE)) self.log.warning( 'Requires stream to have or be running and a valid token file' ) if lock is not None and (isinstance(lock, threading.BoundedSemaphore) or isinstance(lock, threading.Semaphore)): self.lock = lock else: self.lock = FakeSemaphore( ) # Be able to run singlethreaded as well # Lungo REST API self.api_integration_url = '%s/integration/changes' % API_URL # Make a startup log entry self.log.debug('[STARTUP]') self.log.debug('Org_id: {0}'.format(org_id)) self.log.debug('Login: {0}'.format(username)) self.log.debug('Pwd: {0}'.format(password)) self.log.debug('Created: {0}'.format(created)) self.log.debug('Skew: {0} seconds'.format( self.initial_timedelta)) self.log.debug('Sync: {0} minutes'.format(self.sync_interval)) self.log.debug('Populate: {0} hours'.format(self.populate_interval)) self.log.debug('Api url: {0}'.format(self.api_integration_url)) # Created self.org_created = dateutil.parser.parse(created) if self.org_created.tzinfo is None or self.org_created.tzinfo.utcoffset( self.org_created) is None: """self.org_created is naive, no timezone we assume CET""" self.org_created = self.org_created.replace(tzinfo=self.tz_local) self.org_id = org_id try: self.nif = NifApiSynchronization(username, password, realm=NIF_REALM, log_file=SYNC_LOG_FILE, test_login=False) except: self.log.exception( 'Sync client creation for {} failed, terminating'.format( username)) # sys.exit(0) raise Exception('Could not create sync client') # Setup job scheduler if self.background: self.scheduler = BackgroundScheduler() self.log.info('Scheduler: BackgroundScheduler') else: self.scheduler = BlockingScheduler() self.log.info('Scheduler: BlockingScheduler') self.job_misfires = 0 self.scheduler.add_listener(self._job_fire, EVENT_JOB_EXECUTED) self.scheduler.add_listener(self._job_misfire, EVENT_JOB_MISSED) self.job = self.scheduler.add_job(self.sync, 'interval', minutes=self.sync_interval, max_instances=1) self.state.set_state(state='finished') def __del__(self): """Destructor, shutdown the scheduler on exit""" try: self.log.debug('Destructor called, terminating thread') except: pass try: if self.scheduler.running is True: self.scheduler.shutdown(wait=False) self.log.debug('Shutting down scheduler') except: self.log.error('Could not shut down scheduler') pass @property def uptime(self) -> (int, int): """Calculate thread uptime :returns uptime: integer tuple (days, seconds) since thread start """ t = datetime.now() - self.started return t.days, t.seconds @property def job_next_run_time(self): if self.scheduler.state == 1: return self.job.next_run_time return None def job_pause(self): if self.scheduler.state == 1: self.job.pause() def job_resume(self): if self.scheduler.state == 1: self.job.resume() @property def scheduler_state(self): return self.scheduler.state def _job_misfire(self, event) -> None: """Callback for failed job execution. Increases :py:attr:`job_misfires` :param event: apcscheduler.Event """ self.job_misfires += 1 def _job_fire(self, event) -> None: """Callback for succesfull job execution. Decreases :py:attr:`job_misfires` :param event: apcscheduler.Event """ if self.job_misfires > 0: self.job_misfires -= 1 def run(self) -> None: """Start the thread, conforms to threading.Thread.start() Calls :py:meth:`._check` which determines wether to run :py:meth:`populate` or start a job with target :py:meth:`sync` """ self.log.debug('[Starting thread]') self._check() def _stopper(self, force=False) -> None: """If stopper is threading event and is set, then terminate""" # Check if too many errors if self.sync_errors >= NIF_SYNC_MAX_ERRORS: self.state.set_state(mode=self.state.mode, state='terminated', reason='too many errors') self._shutdown() # because setting stopper propagates to all! if isinstance(self.stopper, threading.Event): if self.stopper.is_set(): self.log.warning('Stopper is set, terminating thread') self._shutdown() if force is True: self.log.warning('Forcing shutdown, terminating thread') self._shutdown() def _shutdown(self) -> None: """Shutdown in an orderly fashion""" if self.scheduler.state > 0: try: self.log.debug('Shutting down scheduler') if self.scheduler.running is True: self.scheduler.shutdown(wait=False) # wait=False except Exception as e: self.log.exception('Error shutting down scheduler') self.log.exception('[TERMINATING]') # Terminate instance/thread sys.exit(0) def _check(self) -> None: """Checks to decide to populate or sync on startup .. danger:: On errors from the api, calls :py:meth:`_stopper(force=True)` which will terminate thread. """ self.state.set_state(mode='checking', state='running') # @TODO: check if in changes/stream - get last, then use last date retrieved as start_date (-1microsecond) changes = requests.get( '%s?where={"_org_id":%s, "_realm":"%s"}&sort=[("sequence_ordinal", -1)]&max_results=1' % (self.api_integration_url, self.org_id, NIF_REALM), headers=API_HEADERS) if changes.status_code == 404: # populate, should not happen! # self.populate() self.log.error('404 from {0}, terminating'.format( self.api_integration_url)) self._stopper(force=True) elif changes.status_code == 200: r = changes.json() c = r['_items'] if len(c) == 0: self.log.debug('No change records, populating') self.populate() elif len(c) == 1: # Check date then decide to populate or not! self.log.debug('Got last change records, checking times') sequential_ordinal = dateutil.parser.parse( c[0]['sequence_ordinal']).replace(tzinfo=self.tz_utc) self.log.debug('Last change message recorded {0}'.format( sequential_ordinal.astimezone(self.tz_local).isoformat())) self.initial_start = sequential_ordinal + timedelta( seconds=self.initial_timedelta) - timedelta( hours=self.overlap_timedelta) if self.initial_start.tzinfo is None or self.initial_start.tzinfo.utcoffset( self.initial_start) is None: self.initial_start = self.initial_start.replace( self.tz_local) if self.initial_start < datetime.utcnow().replace( tzinfo=self.tz_utc) - timedelta( hours=self.populate_interval): """More than 30 days!""" self.log.debug('More than {} days, populating'.format( self.populate_interval)) self.populate() self.state.set_state(mode='populate', state='initialized') else: self.log.debug('Less than {} hours, syncing'.format( self.populate_interval)) self.job.modify(next_run_time=datetime.now()) self.log.debug('Told job to start immediately') self.log.debug('Starting sync scheduler') self.scheduler.start() self.state.set_state(mode='sync', state='started') else: self.log.error('{0} from {1}, terminating'.format( changes.status_code, self.api_integration_url)) sys.exit() def _eve_fix_sync(self, o) -> dict: """Just make soap response simpler .. danger:: Deferred. Uses :py:class:`typings.changes.Changes` instead. """ if o['Changes'] is None: o['Changes'] = [] elif 'ChangeInfo' in o['Changes']: o['Changes'] = o['Changes']['ChangeInfo'] for key, value in enumerate(o['Changes']): if o['Changes'][key]['MergeResultOf'] is None: o['Changes'][key]['MergeResultOf'] = [] elif 'int' in o['Changes'][key]['MergeResultOf']: o['Changes'][key]['MergeResultOf'] = o['Changes'][key][ 'MergeResultOf']['int'] else: o['Changes'][key]['MergeResultOf'] = [] o['_org_id'] = self.org_id return o def _update_changes(self, changes) -> None: """Update change message .. note:: Creates a custom unique '_ordinal' for each change message before trying to insert into api. The purpose is to let it gracefully fail with a http 422 if the change message already exists in the api:: sha224(bytearray(entity_type, id, sequence_ordinal, org_id)) :param changes: list of change messages :type changes: :py:class:`typings.changes.Changes` """ for v in changes: v['_ordinal'] = hashlib.sha224( bytearray( "%s%s%s%s" % (v['entity_type'], v['id'], v['sequence_ordinal'], self.org_id), 'utf-8')).hexdigest() # bytearray("%s%s%s%s" % (self.org_id, v['EntityType'], v['Id'], v['sequence_ordinal']), 'utf-8')).hexdigest() v['_status'] = 'ready' # ready -> running -> finished v['_org_id'] = self.org_id v['_realm'] = NIF_REALM r = requests.post(self.api_integration_url, data=json.dumps(v, cls=EveJSONEncoder), headers=API_HEADERS) if r.status_code == 201: self.log.debug( 'Created change message for {0} with id {1}'.format( v['entity_type'], v['id'])) self.messages += 1 elif r.status_code == 422: self.log.debug('422 {0} with id {1} already exists'.format( v['entity_type'], v['id'])) else: self.log.error( '{0} - Could not create change message for {1} with id {2}' .format(r.status_code, v['entity_type'], v['id'])) self.log.error(r.text) def _get_change_messages(self, start_date, end_date, resource) -> None: """Use NIF GetChanges3""" # To avoid future date? time.sleep(NIF_SYNC_DELAY) if resource == 'changes': status, changes = self.nif.get_changes( start_date.astimezone(self.tz_local), end_date.astimezone(self.tz_local)) elif resource == 'competence': status, changes = self.nif.get_changes_competence( start_date.astimezone(self.tz_local), end_date.astimezone(self.tz_local)) elif resource == 'license': status, changes = self.nif.get_changes_license( start_date.astimezone(self.tz_local), end_date.astimezone(self.tz_local)) elif resource == 'federation': status, changes = self.nif.get_changes_federation( start_date.astimezone(self.tz_local), end_date.astimezone(self.tz_local)) else: raise Exception('Resource gone bad, {}'.format(resource)) if status is True: self.log.debug('Got {} changes for {}'.format( len(changes), resource)) if len(changes) > 0: self._update_changes(changes) else: self.log.debug('Empty change messages list') """ try: self.log.debug('Got {} changes for {}'.format(len(changes), resource)) if len(changes) > 0: self._update_changes(changes) except TypeError: self.log.debug('Empty change messages list (_get_changes3)') except Exception as e: self.log.exception('Unknown exception (_get_changes3)') """ else: self.log.error('GetChanges returned error: {0} - {1}'.format( changes.get('code', 0), changes.get('error', 'Unknown error'))) raise Exception('_get_changes_messages returned an error') def _get_changes(self, start_date, end_date) -> None: """Get change messages based on :py:attr:`.sync_type`""" self.from_to = [start_date, end_date] # Adding extra info try: self._get_change_messages(start_date, end_date, self.sync_type) if self.sync_errors > 0: self.sync_errors -= 1 return True except requests.exceptions.ConnectionError: self.sync_errors += 1 self.log.error('Connection error in _get_changes') # Retry @TODO see if retry should be in populate and sync if NIF_SYNC_MAX_ERRORS >= self.sync_errors: time.sleep(3 * self.sync_errors) self._get_changes(start_date, end_date) except TypeError: self.log.debug('TypeError: Empty change messages list ({})'.format( self.sync_type)) except Exception as e: self.sync_errors += 1 self.log.exception('Exception in _get_changes') # @TODO Need to verify if this is reason to warn somehow?? return False def sync(self) -> None: """This method is the job run by the scheduler when last change message is < NIF_POPULATE_INTERVAL. If a job misfires, then on next run the interval to sync will be twice. .. note:: Checks if :py:attr:`.sync_errors` > :py:attr:`.sync_errors_max` and if so it will set :py:attr:`._stopper` for this thread and will run :py:meth:`._stopper` as it always checks, which in turn calls :py:meth:`._shutdown` and terminates the thread. """ self.state.set_state(mode='sync', state='running') # Check if stopper is set self._stopper() self.log.debug('Getting sync messages') if self.initial_start is not None: end = datetime.utcnow().replace(tzinfo=self.tz_utc) start = self.initial_start + timedelta( seconds=self.initial_timedelta) self.log.debug('From: {0}'.format( start.astimezone(self.tz_local).isoformat())) self.log.debug('To: {0}'.format( end.astimezone(self.tz_local).isoformat())) if end > start: if self._get_changes(start, end): self.initial_start = end else: self.log.error('Inconsistence between dates') else: end = datetime.utcnow().replace(tzinfo=self.tz_utc) self.initial_start = end - timedelta(minutes=self.sync_interval) self.log.debug('From: {0}'.format( self.initial_start.astimezone(self.tz_local).isoformat())) self.log.debug('To: {0}'.format( end.astimezone(self.tz_local).isoformat())) if end > self.initial_start: if self._get_changes(self.initial_start, end): self.initial_start = end else: self.log.error('Inconsistence between dates') self.state.set_state(mode='sync', state='sleeping') def populate(self, sync_after=True) -> None: """Populates change messages from :py:attr:`.org_created` until last change message timedelta is less than :py:attr:`.populate_interval` from which it will exit and start :py:attr:`scheduler`. .. attention:: :py:meth:`populate` requires a slot in the connectionpool. Getting a slot requires acquiring :py:attr:`lock`. Number of slots available is set in :py:mod:`syncdaemon` on startup. """ self.state.set_state(mode='populate', state='initializing') self.log.debug('Populate, interval of {0} hours...'.format( self.populate_interval)) # Initial if self.initial_start is None: end_date = self.org_created else: end_date = self.initial_start start_date = end_date - timedelta(hours=self.populate_interval) # Populate loop while end_date < datetime.utcnow().replace( tzinfo=self.tz_utc) + timedelta(hours=self.populate_interval): # Check stopper self._stopper() # Aquire lock and run! self.log.debug('Waiting for slot in connectionpool...') self.state.set_state(state='waiting', reason='connection pool') with self.lock: # .acquire(blocking=True): self.state.set_state(state='running') # Check stopper, might have waited long time self._stopper() # Overlapping end date if end_date > datetime.utcnow().replace(tzinfo=self.tz_utc): end_date = datetime.utcnow().replace(tzinfo=self.tz_utc) self.log.debug( 'Getting last changes between {0} and {1}'.format( start_date.astimezone(self.tz_local).isoformat(), end_date.astimezone(self.tz_local).isoformat())) if self._get_changes(start_date, end_date) is True: # Last populate break # Break while else: self.log.debug( 'Getting changes between {0} and {1}'.format( start_date.astimezone(self.tz_local).isoformat(), end_date.astimezone(self.tz_local).isoformat())) if self._get_changes(start_date, end_date) is True: # Next iteration start_date = end_date end_date = end_date + timedelta( hours=self.populate_interval) time.sleep(0.1) # Grace before we release lock # Since last assignment do not work, use last end_date = start_date for last iteration self.initial_start = start_date self.state.set_state(mode='populate', state='finished', reason='ended populate') if sync_after is True: self.log.debug('Starting sync scheduler...') self.scheduler.start() self.state.set_state(mode='sync', state='started', reason='starting after populate')
class SyncWrapper: def __init__(self, stopper, workers_started, restart=False): self.log = AppLogger(name='syncdaemon') self.workers = [] self.failed_clubs = [] self.stopper = stopper self.workers_started = workers_started self.club_list = [] self.integration = NifIntegration() self.bound_semaphore = threading.BoundedSemaphore(value=SYNC_CONNECTIONPOOL_SIZE) self.restart = restart # Build list of workers # for i in range(0, 10): # self.workers.append(ProducerThread(i, workers_stop, restart)) # time.sleep(1) def start(self, start=False): self.log.info('Starting workers') self.workers_started.set() integration_users = [] # clubs = self.integration.get_clubs() # Only a list of integers! clubs = self.integration.get_active_clubs_from_ka() self.log.info('Got {} integration users'.format(len(clubs))) # Setup each integration user from list of integers for club_id in clubs: if club_id in self.club_list: continue elif club_id not in NIF_INTEGERATION_CLUBS_EXCLUDE: self.club_list.append(club_id) try: if club_id not in NIF_INTEGERATION_CLUBS_EXCLUDE: integration_users.append(NifIntegrationUser(club_id=club_id, create_delay=0)) time.sleep(0.2) except NifIntegrationUserError as e: self.log.exception('Problems creating user for club_id {}: {}'.format(club_id, e)) self.failed_clubs.append({'name': 'From list', 'club_id': club_id}) except Exception as e: self.log.exception('Problems with club id {}: {}'.format(club_id, e)) self.failed_clubs.append({'name': 'From list', 'club_id': club_id}) # Sleep because last created user! time.sleep(180) # Add each integration user to workers for club_user in integration_users: try: if club_user.test_login(): self.workers.append(NifSync(org_id=club_user.club_id, username=club_user.username, password=club_user.password, created=club_user.club_created, stopper=self.stopper, restart=self.restart, background=False, initial_timedelta=0, overlap_timedelta=5, lock=self.bound_semaphore, sync_type='changes', sync_interval=NIF_CHANGES_SYNC_INTERVAL)) self.log.info('Added {}'.format(club_user.username)) else: self.log.error('Failed login for {} with password {}'.format(club_user.club_id, club_user.password)) self.failed_clubs.append({'name': club_user.club_name, 'club_id': club_user.club_id}) except Exception as e: self.failed_clubs.append({'name': club_user.club_name, 'club_id': club_user.club_id}) self.log.exception('Problems for {} ({})'.format(club_user.club_name, club_user.club_id)) # Add license-sync try: self.log.info('Adding competences and license sync') org = NifOrganization(376) self.workers.append(NifSync(org_id=900001, username=NIF_FEDERATION_USERNAME, password=NIF_FEDERATION_PASSWORD, created=org.created, stopper=self.stopper, restart=self.restart, background=False, initial_timedelta=0, overlap_timedelta=5, lock=self.bound_semaphore, sync_type='license', sync_interval=NIF_LICENSE_SYNC_INTERVAL)) self.workers.append(NifSync(org_id=900002, username=NIF_FEDERATION_USERNAME, password=NIF_FEDERATION_PASSWORD, created=org.created, stopper=self.stopper, restart=self.restart, background=False, initial_timedelta=0, overlap_timedelta=5, lock=self.bound_semaphore, sync_type='competence', sync_interval=NIF_COMPETENCE_SYNC_INTERVAL)) self.workers.append(NifSync(org_id=376, username=NIF_FEDERATION_USERNAME, password=NIF_FEDERATION_PASSWORD, created=org.created, stopper=self.stopper, restart=self.restart, background=False, initial_timedelta=0, overlap_timedelta=5, lock=self.bound_semaphore, sync_type='federation', sync_interval=NIF_COMPETENCE_SYNC_INTERVAL)) except Exception as e: self.log.exception('Error initiating licenses and competences') # Start all workers self.log.info('Starting all workers') for worker in self.workers: worker.start() time.sleep(1) # Spread each worker accordingly def shutdown(self): self.log.info('Shutdown workers called') self.stopper.set() for worker in self.workers: self.log.info('Joining {}'.format(worker.name)) worker.join() self.workers_started.clear()