Exemplo n.º 1
0
class NifStream:
    """
    Processes change messages and inserts or updates the corresponding object in api.

    .. danger::
        Private members are also documented. :py:meth:`.run` and :py:meth:`.slow` are the publicly available
        methods.

    Handles the following NIF entity_types:

    * Person        :py:class:`typings.person.Person`
    * Function      :py:class:`typings.function.Function`
    * Organization  :py:class:`typings.organization.Organization`
    * Competence    :py:class:`typings.competence.Competence`
    * License       :py:class:`typings.license.License`

    :py:meth:`.run` uses :py:meth:`pymongo.MongoClient.watch` context to read the oplog change stream and react on each
    inserts. :py:meth:`.run` will check :py:attr:`.resume_token` in :py:attr:`.resume_token_path`
    and try to resume if exists and valid. :py:meth:`.run` will block forever or until :py:attr:`max_restarts`
    is reached.

    .. caution::

        If :py:attr:`resume_token` is out of date or mismatches the oplog, it will be deleted and will not be able to
        resume.

    .. note::

        To address errors like shutdowns or :py:attr:`.resume_token` out of date the class implements :py:meth:`.slow`
        to automatically try to address the errors by getting change messages through :py:mod:`eve_api`.

    Usage::

        from stream import NifStream
        stream = NifStream()
        stream.recover(errors=False)  # fix remaining change messages
        stream.run()  # Blocks forever

    """
    def __init__(self):

        self.log = AppLogger(name='nif-stream',
                             stdout=False,
                             last_logs=0,
                             restart=True)

        self.restarts = 0
        self.max_restarts = 10
        self.token_reset = False

        self.resume_token = None
        self.resume_token_path = Path(STREAM_RESUME_TOKEN_FILE)
        self.resume_token_lock = False

        self.tz_local = tz.gettz("Europe/Oslo")
        self.tz_utc = tz.gettz('UTC')

        # Lungo Api
        self.api_collections = {
            'Person': {
                'url': '{}/persons/process'.format(API_URL),
                'id': 'id'
            },
            'Function': {
                'url': '{}/functions/process'.format(API_URL),
                'id': 'id'
            },
            'Organization': {
                'url': '{}/organizations/process'.format(API_URL),
                'id': 'id'
            },
            'Competence': {
                'url': '{}/competences/process'.format(API_URL),
                'id': 'id'
            },
            'License': {
                'url': '{}/licenses/process'.format(API_URL),
                'id': 'id'
            },
            'Changes': {
                'url': '{}/integration/changes'.format(API_URL),
                'id': 'id'
            },
        }

        # NIF Api
        # Needs one of the clubs? Using platform user!
        self.api_license = NifApiIntegration(username=NIF_FEDERATION_USERNAME,
                                             password=NIF_FEDERATION_PASSWORD,
                                             log_file=STREAM_LOG_FILE,
                                             realm=NIF_REALM)

        self.api_competence = NifApiCompetence(
            username=NIF_FEDERATION_USERNAME,
            password=NIF_FEDERATION_PASSWORD,
            log_file=STREAM_LOG_FILE,
            realm=NIF_REALM)

        self.api = NifApiIntegration(username=ACLUBU,
                                     password=ACLUBP,
                                     log_file=STREAM_LOG_FILE,
                                     realm=NIF_REALM)

        status, hello = self.api._test()
        if status is not True:
            self.log.error('[TERMINATING] Problems with NIF authentication')
            sys.exit(0)

        # Change stream
        client = pymongo.MongoClient()
        self.db = client.ka

    def recover(self, errors=False, realm=NIF_REALM):
        """Get change messages with status:

        errors=True
        * pending
        * error

        errors=False
        * ready

        via :py:mod:`eve_api` and process the messages in :py:meth:`._process_change`

        Sets the :py:attr:`.resume_token_lock` to avoid writing a :py:attr:`.resume_token` on updates.
        """
        r = False
        self.resume_token_lock = True
        if errors is True:
            try:
                r = requests.get(
                    '%s?where={"_status": {"$in": ["pending", "error"]},"_realm":"%s"}&max_results=100000'
                    % (self.api_collections['Changes']['url'], realm),
                    headers=API_HEADERS)
            except Exception as e:
                self.log.exception(
                    'Exception getting error and pending changes in recover')
        else:
            try:
                r = requests.get(
                    '%s?where={"_status": {"$in": ["ready"]},"_realm":"%s"}&max_results=100000'
                    % (self.api_collections['Changes']['url'], realm),
                    headers=API_HEADERS)
            except Exception as e:
                self.log.exception(
                    'Exception getting ready changes in recover')

        if r and r.status_code == 200:

            slow = r.json()

            for change in slow['_items']:
                self._process_change(ChangeStreamItem(change))

        self.resume_token_lock = False

    def _process_change(self, change) -> bool:
        """
        Process a change message. Will call the equivalent `_get_<entity_type>` method corresponding to the entity_type
        of the change message.

        Each of the methods will in turn call and return :py:meth:`._process` with correct payload.

        :param change: the change message
        :type change: dict
        :return: True on success
        :rtype: bool
        """

        status = False

        if change.set_status('pending'):
            try:
                # Get object from nif_api
                if change.entity_type == 'Person':
                    status, result = self.api.get_person(change.get_id())

                elif change.entity_type == 'Function':
                    status, result = self.api.get_function(change.get_id())

                elif change.entity_type == 'Organization':
                    status, result = self.api.get_organization(
                        change.get_id(), NLF_ORG_STRUCTURE)

                elif change.entity_type == 'License':
                    status, result = self.api_license.get_license(
                        change.get_id())

                elif change.entity_type == 'Competence':
                    status, result = self.api_competence.get_competence(
                        change.get_id())

                # Insert into Lungo api
                if status is True:
                    pstatus, pmessage = self._process(result, change)
                    if pstatus is True:  # Sets the change message status
                        change.set_status('finished')
                        return True
                    else:
                        change.set_status('error', pmessage)

                else:
                    self.log.error(
                        'NIF API error for {} ({}) change message: {}'.format(
                            change.entity_type, change.id, change._id))
                    change.set_status('error', 'Got http {} for {} {}'.format(
                        status, change.entity_type,
                        change.get_id()))  # Error in nif_api

            except Exception as e:
                self.log.exception('Error in process change')
                change.set_status('error', {'exception': str(e)})
        else:
            self.log.error('Cant change Person status to pending')
            raise Exception('Cant change Person status to pending')

        return False

    def run(self):
        """Read the mongo change stream

        On all `insert` operations to `integration/changes` we retrieve the full document (change message) and act
        accordingly via :py:meth:`_process_change`

        .. note::
            Starting with MongoDB 4.2 `startAfter` will be replaced by `resumeAfter` which will resume on errors
            unlike current behaviour of `resumeAfter`

        """

        self.log.debug('[Stream started]')
        self._read_resume_token()

        if self.resume_token is not None:
            resume_after = {'_data': self.resume_token}
            self.log.debug('Got resume token')
        else:
            resume_after = None
            self.log.debug('No resume token')

        try:
            # @TODO on upgrade to mongo 4.2 use startAfter instead
            self.log.debug('Change stream watch starting...')
            with self.db.integration_changes.watch(
                    pipeline=[{
                        '$match': {
                            'operationType': 'insert'
                        }
                    }],
                    resume_after=resume_after) as stream:

                for change in stream:
                    if change['fullDocument']['_realm'] == NIF_REALM:
                        self.log.debug(
                            'Processing change message: {} {}'.format(
                                change['fullDocument']['entity_type'],
                                change['fullDocument']['id']))

                        # Always set new resume token
                        self.resume_token = change['_id']['_data']

                        if self._process_change(
                                ChangeStreamItem(
                                    change['fullDocument'])) is True:
                            self.log.debug('Successfully processed')
                            self._write_resume_token()

                self.restarts = 0

        except pymongo.errors.PyMongoError as e:
            self.log.error('Unrecoverable PyMongoError, restarting')
            self.restarts += 1
            if self.restarts > self.max_restarts:
                self.log.error('Too many retries')
                pass
            else:
                self.run()

        except Exception as e:
            self.log.exception('Unknown error in change stream watch')

            self.restarts += 1
            if self.restarts > self.max_restarts:
                self.log.error('Too many restarts: {}'.format(self.restarts))

                if not self.token_reset:
                    self.log.error('Resetting resume token')
                    self._reset_token()
                    self.run()
            elif not self.token_reset:
                self.run()

    def _merge_dicts(self, x, y):
        """Simple and safe merge dictionaries

        :param x: first dictionary
        :type x: dict
        :param y: second dictionary
        :type y: dict
        :return: merged dict z
        :rtype: dict
        """
        z = x.copy()  # start with x's keys and values
        z.update(y)  # modifies z with y's keys and values & returns None

        return z

    def _merge_user_to(self, id, merged):
        """Update all persons in api which ``id`` is merged from.

        .. note::
            NIF describes ``merged_from`` which makes no sense. For the api to be able to hand out a 301 each object
            merged to ``id`` needs a ``merged_to: id``. This can be acted upon and resolves unlimited levels of merging.

        :param id: Person id
        :type id: int
        :param merged: List of person id's merged from to id
        :type merged: list[int]
        :return: None
        :rtype: None
        """

        if len(merged) > 0:

            for m in merged:

                u = requests.get('%s/%s' %
                                 (self.api_collections['Person']['url'], m),
                                 headers=API_HEADERS)

                if u.status_code == 200:
                    u_json = u.json()

                    u_u = requests.patch(
                        '%s/%s' %
                        (self.api_collections['Person']['url'], u_json['_id']),
                        json={'_merged_to': id},
                        headers=self._merge_dicts(
                            API_HEADERS, {'If-Match': u_json['_etag']}))

                    if u_u.status_code == 200:
                        pass
                elif u.status_code == 404:
                    """Not found, create a user!"""
                    u_p = requests.post(self.api_collections['Person']['url'],
                                        json={
                                            'id': m,
                                            '_merged_to': id
                                        },
                                        headers=API_HEADERS)

                    if u_p.status_code != 201:
                        self.log.error('Error merge to ', u_p.text)

    def _write_resume_token(self):
        """Writes the current :py:attr:`resume_token` to :py:attr:`resume_token_path` file"""

        if self.resume_token_lock is not True:

            try:
                with open(STREAM_RESUME_TOKEN_FILE, 'wb+') as f:
                    f.write(self.resume_token)
            except Exception as e:
                self.log.exception('Could not write resume token')

    def _read_resume_token(self):
        """Reads the value of :py:attr:`resume_token_path` file into :py:attr:`resume_token`"""
        try:
            with open(STREAM_RESUME_TOKEN_FILE, 'rb') as f:
                self.resume_token = f.read()
        except FileNotFoundError:
            self.resume_token = None
        except:
            self.log.exception('Error reading resume token')
            self.resume_token = None

    def _reset_token(self, delete=True):
        """Deletes or truncates the :py:attr:`resume_token_path` file

        :param delete: If True then delete file, else truncate
        :type delete: bool
        """

        if delete is True:
            try:
                self.resume_token_path.unlink()
            except:
                self.log.exception('Could not delete resume token')
        else:
            try:
                with open(STREAM_RESUME_TOKEN_FILE, 'wb+') as f:
                    f.write('')
            except:
                self.log.exception('Could not truncate resume token')

        self.token_reset = True

    def _process(self, payload, change):
        """
        Update or create a document based on ``payload``

        :param payload: The payload for the document
        :type payload: dict
        :param change: The change object
        :type change: ChangeStreamItem
        :return: True on success
        :rtype: bool
        """
        api_document = requests.get(
            '%s/%s' %
            (self.api_collections[change.get_value('entity_type')]['url'],
             payload[self.api_collections[change.get_value('entity_type')]
                     ['id']]),
            headers=API_HEADERS)
        rapi = False

        # Does not exist, insert
        if api_document.status_code == 404:

            # Geocode
            if change.get_value(
                    'entity_type') == 'Person' and STREAM_GEOCODE is True:
                payload = add_person_location(payload)
            elif change.get_value(
                    'entity_type'
            ) == 'Organization' and STREAM_GEOCODE is True:
                payload = add_organization_location(payload)

            rapi = requests.post(
                self.api_collections[change.get_value('entity_type')]['url'],
                data=json.dumps(payload, cls=EveJSONEncoder),
                headers=API_HEADERS)

        # Do exist, replace
        elif api_document.status_code == 200:

            api_existing_object = api_document.json()

            # Only update if newer
            if dateutil.parser.parse(
                    api_existing_object['_updated']) < change.get_modified(
                    ).replace(tzinfo=self.tz_local):

                # Geocode
                if change.get_value(
                        'entity_type') == 'Person' and STREAM_GEOCODE is True:
                    payload = add_person_location(payload)

                # Really need to preserve the activities for clubs type_id 5
                if change.get_value(
                        'entity_type') == 'Organization' and payload.get(
                            'type_id', 0) == 5:
                    payload.pop('activities', None)
                    payload.pop('main_activity', None)
                    rapi = requests.patch(
                        '%s/%s' %
                        (self.api_collections[change.get_value('entity_type')]
                         ['url'], api_existing_object['_id']),
                        data=json.dumps(payload, cls=EveJSONEncoder),
                        headers=self._merge_dicts(
                            API_HEADERS,
                            {'If-Match': api_existing_object['_etag']}))
                else:
                    rapi = requests.put(
                        '%s/%s' %
                        (self.api_collections[change.get_value('entity_type')]
                         ['url'], api_existing_object['_id']),
                        data=json.dumps(payload, cls=EveJSONEncoder),
                        headers=self._merge_dicts(
                            API_HEADERS,
                            {'If-Match': api_existing_object['_etag']}))

            # If obsolete, just return
            else:
                change.set_status('finished')
                return True, None

        # If successful put or post
        if rapi.status_code in [200, 201]:

            if change.get_value('entity_type') == 'Person':

                rapi_json = rapi.json()

                # Add merged to for all merged from
                if len(change.merged_from) > 0:
                    self._merge_user_to(rapi_json['id'], change.merged_from)

            return True, None

        else:

            self.log.error(
                'Error in _process for {} with id {} change {}'.format(
                    change.get_value('entity_type'), change.id,
                    change.get_value('_id')))
            self.log.error('Error: http {} said {}'.format(
                rapi.status_code, rapi.text))

            try:
                rapi_json = rapi.json()
                if '_issues' in rapi_json:
                    return False, rapi_json['_issues']
            except Exception as e:
                rapi_json = {
                    '_issues': {
                        'message':
                        'Unknown error, got http {} with error {}'.format(
                            rapi.status_code, str(e))
                    }
                }

        return False, rapi_json.get('_issues', 'Unknown error line 442')
Exemplo n.º 2
0
class NifIntegrationUser:
    """Handles retrieving existing or creating new integration users for use with :py:mod:`sync.NifSync` and
    :py:mod:`stream.NifStream`

    .. attention::
        If a user do not exist, it will be created on init.

    .. note::
        This class only handles integration users on club level (org_type_id=5) and requires a platform user in NIF.

    .. attention::
        It might take up to (or more) than 180 seconds from creating a user before that user can be authenticated
    """
    def __init__(self,
                 club_id,
                 create_delay=1,
                 log_file='integration_user.log'):

        self.ALPHNUM = ('abcdefghijklmnopqrstuvwxyz' +
                        'ABCDEFGHIJKLMNOPQRSTUVWXYZ' + '01234567890')

        self.username = None
        self.password = None
        self.nif_user = {}
        self.user_id = None
        self.club_created = None
        self.club_name = None
        self.log_file = log_file

        self.log = AppLogger('klubb-{0}'.format(club_id))

        self.log.debug('[Integration user]')

        self.club_id = club_id

        self.test_client = None

        api_user = requests.get(
            '%s/integration/users/?where={"club_id": %s, "_active": true, "_realm": "%s"}&max_results=1000'
            % (API_URL, self.club_id, NIF_REALM),
            headers=API_HEADERS)

        if api_user.status_code == 200:
            api_users_json = api_user.json()

            if len(api_users_json['_items']) > 1:  # multiple users
                self.log.error(
                    'More than one active club in realm {} for club id {}'.
                    format(NIF_REALM, self.club_id))
                raise NifIntegrationUserError('More than one active club')

            elif len(api_users_json['_items']) == 1:  # One user only

                api_user_json = api_users_json['_items'][0]

                self.username = '******'.format(
                    NIF_CLUB_APP_ID, api_user_json['function_id'],
                    api_user_json['username'])

                self.password = api_user_json['password']
                self.user_id = api_user_json['id']
                self.log.debug('Using existing integration user {}'.format(
                    self.username))

                if 'club_created' in api_user_json:
                    self.club_created = api_user_json['club_created']
                    self.club_name = api_user_json['club_name']
                else:
                    self.club_created, self.club_name = self._get_club_details(
                    )

                if create_delay > 0:
                    if self._time_authentication(
                            create_delay=create_delay) is True:
                        self.log.debug('Authentication ok')
                    else:
                        # @TODO set user _active = False?
                        self.log.error('Failed authentication via Hello')
                        time.sleep(5)

            elif len(api_users_json['_items']
                     ) == 0:  # No users found but 200 anyway
                """Not found create user!"""
                self.log.debug(
                    'No existing integration user found but http 200, creating...'
                )

                self.club_created, self.club_name = self._get_club_details()

                if self._create():
                    self.log.debug(
                        'Created integration user for club id {}'.format(
                            self.club_id))

                    if create_delay > 0:
                        self._time_authentication(create_delay=create_delay)

                else:
                    raise NifIntegrationUserCreateError
            else:
                self.log.exception(
                    'Creation of user for club id {} failed, got {} users and http 200'
                    .format(self.club_id, len(api_users_json['_items'])))

        elif api_user.status_code == 404:
            """Not found create user!"""
            self.log.debug('No existing integration user found, creating...')

            self.club_created, self.club_name = self._get_club_details()

            if self._create():
                self.log.debug(
                    'Created integration user for club id {}'.format(
                        self.club_id))

                # if create_delay > 0:
                #    self.log('Delaying {}s before proceeding'.format(create_delay))
                #    time.sleep(create_delay)

                # Test authentication of user
                # Only if create_delay > 0
                # This will try to loop in intervals until authenticated or timeout

                if create_delay > 0:
                    self._time_authentication(create_delay=create_delay)

            else:
                raise NifIntegrationUserCreateError

        else:
            self.log.exception('Unknown error')
            raise NifIntegrationUserError

    def _time_authentication(self, create_delay) -> bool:

        self.log.debug('Running auth test for {} with password {}'.format(
            self.username, self.password))

        authenticated = False
        time_spent = 0
        increment = 10

        while not authenticated:
            print('.')
            if time_spent > create_delay:
                self.log.debug(
                    'Could not authenticate user after {} seconds'.format(
                        time_spent))
                raise NifIntegrationUserAuthenticationError(
                    'Can not authenticate user')

            authenticated = self.test_login()
            time.sleep(increment)
            time_spent += increment

            if time_spent > 220:
                self.log.debug(
                    'Could NOT authenticate user after {} seconds, exiting'.
                    format(time_spent))
                break

        if authenticated:
            self.log.debug(
                'Authenticated user after {} seconds'.format(time_spent))

        return authenticated

    def _get_club_details(self):

        response = requests.get('{}/organizations/{}'.format(
            API_URL, self.club_id),
                                headers=API_HEADERS)

        if response.status_code == 200:

            r = response.json()

            if 'created' in r and 'name' in r:
                return r['created'], r['name']

        return '1995-10-11T22:00:00.000000Z', 'Unknown name'

    def _create(self):
        """Creates an integration user applied to club_id
        Firstname: NLF<app id>-<club_id>
        Username: IGNLF<app_id>-<club_id>
        Only needs correct function Id
        User will end up as club user anyway"""

        sync_client = NifApiSynchronization(username=NIF_PLATFORM_USERNAME,
                                            password=NIF_PLATFORM_PASSWORD,
                                            realm=NIF_REALM,
                                            log_file=self.log_file)

        self.password = self.generate_password()
        club_username = '******'.format(NIF_CLUB_USERNAME_PREFIX,
                                         self.club_id)

        # Log user to be created
        self.log.debug('Creating integration user')
        self.log.debug('Club:   {}'.format(self.club_name))
        self.log.debug('User:   {}'.format(club_username))
        self.log.debug('Pwd:    {}'.format(self.password))

        # The exception is caught in the call to _create.
        status, self.nif_user = sync_client.create_integration_user(
            FirstName='{0}-{1}'.format(NIF_CLUB_FIRSTNAME_PREFIX,
                                       self.club_id),
            LastName='NIF.Connect',
            OrgId=self.club_id,
            Password=self.password,
            UserName=club_username)
        # Email=None)

        if status is True:

            self.log.debug(
                'Created integration user {0} with password {1}'.format(
                    club_username, self.password))

            # Get correct function id
            function_id = 0
            if 'ActiveFunctions' in self.nif_user and 'Function' in self.nif_user[
                    'ActiveFunctions']:

                for f in self.nif_user['ActiveFunctions']['Function']:

                    if (f['FunctionTypeId'] == NIF_INTEGRATION_FUNCTION_TYPE_ID
                            and f['ActiveInOrgId'] == self.club_id):
                        function_id = f['Id']
                        break  # break for

            # If no function id use platform function id which works.
            if function_id == 0:
                function_id = NIF_PLATFORM_FUNCTION_ID

            # assign correct username and password
            self.username = '******'.format(NIF_CLUB_APP_ID, function_id,
                                                 club_username)

            # Assign correct user_id
            if 'Id' in self.nif_user:
                self.user_id = self.nif_user['Id']  # Or PersonId?
            elif 'PersonId' in self.nif_user:
                self.user_id = self.nif_user['PersonId']
            else:
                self.log.error('Could not assign user_id to integration user')
                raise NifIntegrationUserCreateError

            # Store the integration user in Lungo
            payload = {
                'username': club_username,
                'password': self.password,
                'id': self.user_id,
                'app_id': NIF_CLUB_APP_ID,
                'function_id': function_id,
                'club_id': self.club_id,
                'club_name': self.club_name,
                'modified': self.nif_user['LastChangedDate'],
                'club_created': self.club_created,
                '_realm': NIF_REALM,
                '_active': True
            }

            api_user = requests.post('{}/integration/users'.format(API_URL),
                                     data=json.dumps(payload,
                                                     cls=EveJSONEncoder),
                                     headers=API_HEADERS)
            if api_user.status_code == 201:
                self.log.debug('Successfully created user in Lungo')
                return True  # Instead of returning void, return something else
            else:
                self.log.error('Could not create user in Lungo')
                self.log.error('Code:   {}'.format(api_user.status_code))
                self.log.error('Msg:    {}'.format(api_user.text))
                raise NifIntegrationUserCreateError

        else:
            self.log.error(
                'Could not create integration user {0} with password {1}'.
                format(club_username, self.password))
            self.log.error('NIF Api said:')
            self.log.error('Code:   {}'.format(self.nif_user['ErrorCode']))
            self.log.error('Message:{}'.format(self.nif_user['ErrorMessage']))
            raise NifIntegrationUserCreateError

        return False

    def get_user(self):

        pass

    def generate_password(self, count=1, length=12):
        """ Generate password
        Kwargs:
            count (int)::
                How many passwords should be returned?
            length (int)::
                How many characters should the password contain
            allowed_chars (str)::
                Characters
        Returns:
            String with the password. If count > 1 then the return value will be auto_now=
            list of strings.
        """
        if count == 1:
            return ''.join(sample(self.ALPHNUM, length))

        passwords = []
        while count > 0:
            passwords.append(''.join(sample(self.ALPHNUM, length)))
            count -= 1

        return passwords
Exemplo n.º 3
0
class ApiClient:
    __SNKRS_PER_PAGE = 100
    __API_ENDPOINT = Template(
        f"https://api.thesneakerdatabase.com/v1/sneakers?limit={__SNKRS_PER_PAGE}&page=$page"
    )
    __IMG_SIZE_ARGS = dict(s="thumbUrl", m="smallImageUrl", l="imageUrl")

    def __init__(self,
                 limit: int = sys.maxsize,
                 starting_page: int = 0,
                 image_size: str = "s"):
        try:
            size_arg = ApiClient.__IMG_SIZE_ARGS[image_size]
        except IndexError:
            raise ValueError(
                f"Argument size has to be one from 's' (default), 'm', 'l'! Got '{image_size}' instead."
            )
        if starting_page < 0 or limit < 0:
            raise ValueError(
                "Starting page number and limit must not be negative!")

        self.limit = limit
        self.starting_page = starting_page
        self.image_size = size_arg
        output_dir = Path("images") / "training" / image_size / "sneakers"
        output_dir.mkdir(exist_ok=True, parents=True)
        self.output_dir = output_dir
        self.log = AppLogger("AsyncApiClient")
        self.session = None
        self.download_queue = None
        self.save_queue = None

    async def start_bulk_download(self):
        self.log.info("Starting download...")
        self.download_queue, self.save_queue = asyncio.Queue(), asyncio.Queue()

        tasks = [
            asyncio.Task(self.__url_fetcher()),
            asyncio.Task(self.__image_fetcher()),
            asyncio.Task(self.__image_writer())
        ]

        try:
            self.session = aiohttp.ClientSession()
            await asyncio.gather(*tasks)
        except Exception:
            self.log.error("Unexpected error!")
            self.log.warning("Terminating...")
            raise
        finally:
            await self.session.close()

        self.log.info("All jobs complete!")

    async def __url_fetcher(self):
        n_fetched = 0
        page = itertools.count(self.starting_page)

        while n_fetched < self.limit:
            response = await self.__async_call_with_retry(
                self.session.get,
                self.__API_ENDPOINT.substitute(page=next(page)))
            sneakers = (await
                        self.__async_call_with_retry(response.json))["results"]

            if len(sneakers) == 0:
                self.log.warning(
                    "API ran out of sneakers! Cannot fetch more images.")
                break

            image_urls = [
                sneaker["media"][self.image_size] for sneaker in
                sneakers[:min(self.__SNKRS_PER_PAGE, self.limit - n_fetched)]
                if sneaker["media"][self.image_size] is not None
            ]

            for image_url in image_urls:
                self.download_queue.put_nowait(image_url)
                n_fetched += 1

        self.download_queue.put_nowait(None)
        self.log.info("Fetching urls complete!")

    async def __image_fetcher(self):
        while True:
            image_url = await self.download_queue.get()
            if image_url is None:
                break

            image = await self.__async_call_with_retry(self.session.get,
                                                       image_url)
            self.save_queue.put_nowait(image)

        self.download_queue.task_done()
        self.save_queue.put_nowait(None)
        self.log.info("Fetching images complete!")

    async def __image_writer(self):
        while True:
            image = await self.save_queue.get()
            if image is None:
                break

            filepath = self.output_dir / Path(image.url.path).name
            content = await self.__async_call_with_retry(image.read)

            async with aiofiles.open(filepath, "wb") as f:
                self.log.debug(f"Saving {filepath.name}...")
                await f.write(content)

        self.save_queue.task_done()
        self.log.info("Saving images complete!")

    @staticmethod
    @retry(wait=wait_random_exponential(multiplier=3),
           stop=stop_after_attempt(3))
    async def __async_call_with_retry(func: callable, *args, **kwargs):
        ret = await func(*args, **kwargs)
        return ret
Exemplo n.º 4
0
class Base(object):
    STATUS = {
        'WAITING': 0,
        'QUEUED': 10,
        'PROCESSING': 20,
        'SUCCESS': 30,
        'SKIPPED': 40,
        'UNSTABLE': 50,
        'TIMEOUT': 60,
        'CANCELLED': 70,
        'FAILED': 80
    }

    def __init__(self, module_name):
        self.module = module_name
        self.config = Config()
        self.log = AppLogger(self.config, self.module)

    def command(self, cmd, working_dir, script=False):
        self.log.info('Executing command: {0}\nDir: {1}'.format(
            cmd, working_dir))
        if script:
            self.log.debug('Executing user command')
            return self.__exec_user_command(cmd, working_dir)
        else:
            self.log.debug('Executing system command')
            self.__exec_system_command(cmd, working_dir)

    def __exec_system_command(self, cmd, working_dir):
        self.log.debug('System command runner \nCmd: {0}\nDir: {1}'.format(
            cmd, working_dir))
        cmd = '{0} 2>&1'.format(cmd)
        self.log.info('Running {0}'.format(cmd))

        proc = None
        try:
            proc = subprocess.Popen(
                cmd, shell=True,
                stdout=subprocess.PIPE,
                cwd=working_dir,
                env=os.environ.copy(),
                universal_newlines=True)
            stdout, stderr = proc.communicate()
            returncode = proc.returncode
            if returncode != 0:
                command_status = self.STATUS['FAILED']
                self.log.debug(stdout)
                self.log.debug(stderr)
            else:
                self.log.debug('System command completed {0}\nOut:{1}'.format(
                    cmd, stdout))
                command_status = self.STATUS['SUCCESS']
                #self.log.log_command_op(stdout)
                self.log.debug(stdout)
            self.log.debug('Command completed {0}'.format(cmd))
        except Exception as exc:
            error_message = 'Error running system command. Err: {0}'.format(exc)
            self.log.error(error_message)
            trace = traceback.format_exc()
            self.log.error(exc)
            self.log.error(trace)
            raise Exception(error_message)
        self.log.debug('Returning command status: {0}'.format(command_status))
        return command_status

    def __exec_user_command(self, cmd, working_dir):
        self.log.debug('Executing streaming command {0}'.format(cmd))
        current_step_state = self.STATUS['FAILED']

        command_thread_result = {
            'success': False,
            'returncode': None
        }

        command_thread = threading.Thread(
            target=self.__command_runner,
            args=(cmd, working_dir, command_thread_result,))

        command_thread.start()

        self.log.debug('Waiting for command thread to complete')
        command_thread.join(self.config['MAX_COMMAND_SECONDS'])
        self.log.debug('Command thread join has returned. Result: {0}'\
                .format(command_thread_result))

        if command_thread.is_alive():
            self.log.log_command_err('Command timed out')
            self.log.error('Command thread is still running')
            is_command_success = False
            current_step_state = self.STATUS['TIMEOUT']
            self.log.log_command_err('Command thread timed out')
        else:
            self.log.debug('Command completed {0}'.format(cmd))
            is_command_success = command_thread_result['success']
            if is_command_success:
                self.log.debug('command executed successfully: {0}'.format(cmd))
                current_step_state = self.STATUS['SUCCESS']
            else:
                error_message = 'Command failed : {0}'.format(cmd)
                exception = command_thread_result.get('exception', None)
                if exception:
                    error_message += '\nException {0}'.format(exception)
                self.log.error(error_message)
                current_step_state = self.STATUS['FAILED']
                self.log.error(error_message)

        self.log.flush_console_buffer()

        return current_step_state

    def __command_runner(self, cmd, working_dir, result):
        self.log.debug('command runner \nCmd: {0}\nDir: {1}'.format(
            cmd, working_dir))
        cmd = '{0} 2>&1'.format(cmd)
        self.log.info('Running {0}'.format(cmd))

        proc = None
        success = False
        try:
            proc = subprocess.Popen(
                cmd, shell=True,
                stdout=subprocess.PIPE,
                cwd=working_dir,
                env=os.environ.copy(),
                universal_newlines=True)

            exception = 'Invalid or no script tags received'
            current_group_info = None
            current_group_name = None
            current_cmd_info = None
            for line in iter(proc.stdout.readline, ''):
                self.log.debug(line)
                line_split = line.split('|')
                if line.startswith('__SH__GROUP__START__'):
                    current_group_info = line_split[1]
                    current_group_name = '|'.join(line_split[2:])
                    current_group_info = json.loads(current_group_info)
                    show_group = current_group_info.get('is_shown', True)
                    if show_group == 'false':
                        show_group = False
                    console_out = {
                        'consoleId': current_group_info.get('id'),
                        'parentConsoleId': '',
                        'type': 'grp',
                        'message': current_group_name,
                        'msgTimestamp': self.__get_timestamp(),
                        'completed': False,
                        'isShown': show_group
                    }
                    self.log.append_console_buffer(console_out)
                elif line.startswith('__SH__CMD__START__'):
                    current_cmd_info = line_split[1]
                    current_cmd_name = '|'.join(line_split[2:])
                    current_cmd_info = json.loads(current_cmd_info)
                    parent_id = current_group_info.get('id') if current_group_info else None
                    console_out = {
                        'consoleId': current_cmd_info.get('id'),
                        'parentConsoleId': parent_id,
                        'type': 'cmd',
                        'message': current_cmd_name,
                        'msgTimestamp': self.__get_timestamp(),
                        'completed': False
                    }
                    if parent_id:
                        self.log.append_console_buffer(console_out)
                elif line.startswith('__SH__CMD__END__'):
                    current_cmd_end_info = line_split[1]
                    current_cmd_end_name = '|'.join(line_split[2:])
                    current_cmd_end_info = json.loads(current_cmd_end_info)
                    parent_id = current_group_info.get('id') if current_group_info else None
                    is_completed = False
                    if current_cmd_end_info.get('completed') == '0':
                        is_completed = True
                    console_out = {
                        'consoleId': current_cmd_info.get('id'),
                        'parentConsoleId': parent_id,
                        'type': 'cmd',
                        'message': current_cmd_end_name,
                        'msgTimestamp': self.__get_timestamp(),
                        'completed': is_completed
                    }
                    if parent_id:
                        self.log.append_console_buffer(console_out)
                elif line.startswith('__SH__GROUP__END__'):
                    current_grp_end_info = line_split[1]
                    current_grp_end_name = '|'.join(line_split[2:])
                    current_grp_end_info = json.loads(current_grp_end_info)
                    is_completed = False
                    if current_grp_end_info.get('completed') == '0':
                        is_completed = True
                    console_out = {
                        'consoleId': current_group_info.get('id'),
                        'parentConsoleId': '',
                        'type': 'grp',
                        'message': current_grp_end_name,
                        'msgTimestamp': self.__get_timestamp(),
                        'completed': is_completed
                    }
                    self.log.append_console_buffer(console_out)
                elif line.startswith('__SH__SCRIPT_END_SUCCESS__'):
                    success = True
                    break
                elif '__SH__ARCHIVE_END__' in line:
                    success = True
                    break
                elif line.startswith('__SH__SCRIPT_END_FAILURE__'):
                    if current_group_info:
                        console_out = {
                            'consoleId': current_group_info.get('id'),
                            'parentConsoleId': '',
                            'type': 'grp',
                            'message': current_group_name,
                            'msgTimestamp': self.__get_timestamp(),
                            'completed': False
                        }
                        self.log.append_console_buffer(console_out)
                    success = False
                    exception = 'Script failure tag received'
                    break
                else:
                    parent_id = current_cmd_info.get('id') if current_cmd_info else None
                    console_out = {
                        'consoleId': str(uuid.uuid4()),
                        'parentConsoleId': parent_id,
                        'type': 'msg',
                        'message': line,
                        'msgTimestamp': self.__get_timestamp(),
                        'completed': False
                    }
                    if parent_id:
                        self.log.append_console_buffer(console_out)
                    else:
                        self.log.debug(console_out)

            proc.kill()
            if success == False:
                self.log.debug('Command failure')
                result['returncode'] = 99
                result['success'] = False
                result['exception'] = exception
            else:
                self.log.debug('Command successful')
                result['returncode'] = 0
                result['success'] = True
        except Exception as exc:
            self.log.error('Exception while running command: {0}'.format(exc))
            trace = traceback.format_exc()
            self.log.error(trace)
            result['returncode'] = 98
            result['success'] = False
            result['exception'] = trace

        self.log.info('Command returned {0}'.format(result['returncode']))

    def __get_timestamp(self):
        return int(time.time() * 1000000)

    def pop_step(self, execute_plan, step):
        self.log.debug('popping the top of stack: {0}'\
                .format(execute_plan['steps']))
        try:
            for k in execute_plan['steps'].keys():
                if k == step['step_key']:
                    del execute_plan['steps'][k]
                    self.log.debug('popped out top of stack. \n stack {0}'\
                        .format(execute_plan['steps']))
            return execute_plan
        except Exception as exc:
            self.log.error('error occurred while poping ' \
                'step: {0}'.format(str(exc)))
            raise exc

    def get_top_of_stack(self, execute_plan):
        error_occurred = False
        error_message = ''
        try:
            self.log.info('inside get_top_of_stack')
            steps = execute_plan.get('steps', None)
            if steps is None:
                error_message = 'No steps found in the execute plan: {0}'\
                        .format(execute_plan)
                error_occurred = True
                return
            if len(steps) == 0:
                self.log.info('No steps present in execute plan, returning' \
                    'empty TopOfStack')
                return None
            keys = []
            for k in steps.keys():
                keys.append(int(str(k)))

            self.log.debug('steps keys {0}'.format(keys))
            keys.sort()
            self.log.debug('sorted keys {0}'.format(keys))
            current_step_key = str(keys[0])
            current_step = steps[current_step_key]
            current_step['step_key'] = current_step_key
            return current_step
        except Exception as exc:
            error_message = 'Error occurred while trying to get the step' \
                    ' from execute plan \nError: {0} execute plan: {1}' \
                    .format(str(exc), execute_plan)
            error_occurred = True
        finally:
            if error_occurred:
                raise Exception(error_message)
Exemplo n.º 5
0
class NifSync(threading.Thread):
    """Populate and sync change messages from NIF api

    Currently handles following services and respective entity types:

    * SynchronizationService/GetChanges3:
        * Person
        * Function
        * Organization
    * SynchronizationService/GetChangesCompetence2:
        * Competence
    * SynchronizationService/GetChangesLicense:
        * License

    .. danger::
        The club (org_type_id=5) integration users can only use GetChanges3 ( :py:attr:`sync_type` ='changes'). For
        GetChangesCompetence2 ( :py:attr:`sync_type` ='competence') and
        GetChangesLicense ( :py:attr:`sync_type` = 'license') an integration user on federation level is required

    The class will automatically handle when to :py:meth:`populate` and :py:meth:`sync`.

    .. note::
        :py:meth:`_check` is called on init and checks with the api to find last change message for
        :py:attr:`org_id` and the last message is then used as the initial starting point.


    :param org_id: The integration user organization id, required
    :type org_id: int
    :param login: The full path integration username ('app_id/function_id/username'), required
    :type login: str
    :param password: Password, required
    :type password: str
    :param created: A datetime string representing creation date of org_id, required
    :type created: str
    :param stopper: a threading.Event flag to exit
    :type stopper: threading.Event
    :param restart: On True will reset all AppLogger handlers
    :type restart: bool
    :param background: Sets the scheduler. Defaults to False and BlockingScheduler
    :type background: bool
    :param initial_timedelta: The initial timedelta to use from last change message in ms
    :type initial_timedelta: int
    :param overlap_timedelta: A optional timedelta for overlap functions in hours
    :type overlap_timedelta: int
    :param lock: The semaphore object, if None uses :py:class:`.FakeSemaphore`
    :type lock: threading.BoundedSemaphore
    :param sync_type: The sync type for this user, allowed ``changes``, ``competence`` and ``license``. Defaults to ``changes``.
    :type sync_type: str
    :param sync_interval: The interval for the sync scheduler in minutes. Defaults to NIF_SYNC_INTERVAL
    :type sync_interval: int
    :param populate_interval: The interval for populating in days. Defaults to NIF_POPULATE_INTERVAL
    :type populate_interval: int

    Usage - threading::

        from sync import NifSync
        sync = NifSync(org_id, username, password)
        sync.start()  # sync is of threading.Thread

    Usage - blocking::

        from sync import NifSync
        sync = NifSync(org_id, username, password)
        sync.run()  # sync starts without thread running

    Usage - with semaphore::

        import threading
        from sync import NifSync
        bound_semaphore = threading.BoundedSemaphore(value=10)
        sync = NifSync(org_id, username, password, lock=bounding_semaphore)
        sync.start()  # sync is of threading.Thread, semaphore has 10 slots

    .. note::
        The usernames and passwords for integration users on club level is stored in integration/users and accessible
        through :py:mod:`
    """
    def __init__(self,
                 org_id,
                 username,
                 password,
                 created,
                 stopper=False,
                 restart=False,
                 background=False,
                 initial_timedelta=0,
                 overlap_timedelta=0,
                 lock=None,
                 sync_type='changes',
                 sync_interval=NIF_CHANGES_SYNC_INTERVAL,
                 populate_interval=NIF_POPULATE_INTERVAL):

        self.state = SyncState()

        # Init thread
        super().__init__(name='klubb-{0}'.format(org_id))

        if sync_type in ['changes', 'license', 'competence', 'federation']:
            self.sync_type = sync_type
        else:
            raise Exception('{} is not a valid sync type'.format(sync_type))

        self.id = org_id
        self.username = username

        self.started = datetime.now()
        self.sync_errors = 0
        # self.sync_errors_max = 3  # Errors in a row!

        self.sync_interval = sync_interval  # minutes
        self.populate_interval = populate_interval  # days

        self.initial_timedelta = initial_timedelta
        self.overlap_timedelta = overlap_timedelta

        self.messages = 0  # Holds number of successfully processed messages

        self.stopper = stopper
        self.background = background

        self.initial_start = None
        self.from_to = [None, None]
        self.sync_started = False

        self.tz_local = tz.gettz(LOCAL_TIMEZONE)
        self.tz_utc = tz.gettz('UTC')

        #  Init logger
        self.log = AppLogger(name='klubb-{0}'.format(org_id),
                             stdout=not background,
                             last_logs=100,
                             restart=restart)

        # No stopper, started directly check for stream resume token!
        if self.stopper is False:
            from pathlib import Path
            resume_token = Path(STREAM_RESUME_TOKEN_FILE)

            if resume_token.is_file() is not True:
                self.log.warning(
                    'No resume token at {}'.format(STREAM_RESUME_TOKEN_FILE))
                self.log.warning(
                    'Requires stream to have or be running and a valid token file'
                )

        if lock is not None and (isinstance(lock, threading.BoundedSemaphore)
                                 or isinstance(lock, threading.Semaphore)):
            self.lock = lock
        else:
            self.lock = FakeSemaphore(
            )  # Be able to run singlethreaded as well

        # Lungo REST API
        self.api_integration_url = '%s/integration/changes' % API_URL

        # Make a startup log entry
        self.log.debug('[STARTUP]')
        self.log.debug('Org_id:     {0}'.format(org_id))
        self.log.debug('Login:      {0}'.format(username))
        self.log.debug('Pwd:        {0}'.format(password))
        self.log.debug('Created:    {0}'.format(created))
        self.log.debug('Skew:       {0} seconds'.format(
            self.initial_timedelta))
        self.log.debug('Sync:       {0} minutes'.format(self.sync_interval))
        self.log.debug('Populate:   {0} hours'.format(self.populate_interval))
        self.log.debug('Api url:    {0}'.format(self.api_integration_url))

        # Created
        self.org_created = dateutil.parser.parse(created)
        if self.org_created.tzinfo is None or self.org_created.tzinfo.utcoffset(
                self.org_created) is None:
            """self.org_created is naive, no timezone we assume CET"""
            self.org_created = self.org_created.replace(tzinfo=self.tz_local)

        self.org_id = org_id

        try:
            self.nif = NifApiSynchronization(username,
                                             password,
                                             realm=NIF_REALM,
                                             log_file=SYNC_LOG_FILE,
                                             test_login=False)
        except:
            self.log.exception(
                'Sync client creation for {} failed, terminating'.format(
                    username))
            # sys.exit(0)
            raise Exception('Could not create sync client')

        # Setup job scheduler
        if self.background:
            self.scheduler = BackgroundScheduler()
            self.log.info('Scheduler:  BackgroundScheduler')
        else:
            self.scheduler = BlockingScheduler()
            self.log.info('Scheduler:  BlockingScheduler')

        self.job_misfires = 0
        self.scheduler.add_listener(self._job_fire, EVENT_JOB_EXECUTED)
        self.scheduler.add_listener(self._job_misfire, EVENT_JOB_MISSED)

        self.job = self.scheduler.add_job(self.sync,
                                          'interval',
                                          minutes=self.sync_interval,
                                          max_instances=1)

        self.state.set_state(state='finished')

    def __del__(self):
        """Destructor, shutdown the scheduler on exit"""

        try:
            self.log.debug('Destructor called, terminating thread')
        except:
            pass

        try:
            if self.scheduler.running is True:
                self.scheduler.shutdown(wait=False)
                self.log.debug('Shutting down scheduler')
        except:
            self.log.error('Could not shut down scheduler')
            pass

    @property
    def uptime(self) -> (int, int):
        """Calculate thread uptime

        :returns uptime: integer tuple (days, seconds) since thread start
        """

        t = datetime.now() - self.started
        return t.days, t.seconds

    @property
    def job_next_run_time(self):

        if self.scheduler.state == 1:
            return self.job.next_run_time

        return None

    def job_pause(self):

        if self.scheduler.state == 1:
            self.job.pause()

    def job_resume(self):

        if self.scheduler.state == 1:
            self.job.resume()

    @property
    def scheduler_state(self):

        return self.scheduler.state

    def _job_misfire(self, event) -> None:
        """Callback for failed job execution. Increases :py:attr:`job_misfires`

        :param event: apcscheduler.Event
        """
        self.job_misfires += 1

    def _job_fire(self, event) -> None:
        """Callback for succesfull job execution. Decreases :py:attr:`job_misfires`

        :param event: apcscheduler.Event
        """
        if self.job_misfires > 0:
            self.job_misfires -= 1

    def run(self) -> None:
        """Start the thread, conforms to threading.Thread.start()

        Calls :py:meth:`._check` which determines wether to run :py:meth:`populate` or start a job with target
        :py:meth:`sync`
        """
        self.log.debug('[Starting thread]')
        self._check()

    def _stopper(self, force=False) -> None:
        """If stopper is threading event and is set, then terminate"""

        # Check if too many errors
        if self.sync_errors >= NIF_SYNC_MAX_ERRORS:
            self.state.set_state(mode=self.state.mode,
                                 state='terminated',
                                 reason='too many errors')
            self._shutdown()  # because setting stopper propagates to all!

        if isinstance(self.stopper, threading.Event):

            if self.stopper.is_set():
                self.log.warning('Stopper is set, terminating thread')
                self._shutdown()

        if force is True:
            self.log.warning('Forcing shutdown, terminating thread')
            self._shutdown()

    def _shutdown(self) -> None:
        """Shutdown in an orderly fashion"""

        if self.scheduler.state > 0:
            try:
                self.log.debug('Shutting down scheduler')
                if self.scheduler.running is True:
                    self.scheduler.shutdown(wait=False)  # wait=False
            except Exception as e:
                self.log.exception('Error shutting down scheduler')

        self.log.exception('[TERMINATING]')

        # Terminate instance/thread
        sys.exit(0)

    def _check(self) -> None:
        """Checks to decide to populate or sync on startup

        .. danger::
            On errors from the api, calls :py:meth:`_stopper(force=True)` which will terminate thread.
        """

        self.state.set_state(mode='checking', state='running')
        # @TODO: check if in changes/stream - get last, then use last date retrieved as start_date (-1microsecond)
        changes = requests.get(
            '%s?where={"_org_id":%s, "_realm":"%s"}&sort=[("sequence_ordinal", -1)]&max_results=1'
            % (self.api_integration_url, self.org_id, NIF_REALM),
            headers=API_HEADERS)

        if changes.status_code == 404:
            # populate, should not happen!
            # self.populate()
            self.log.error('404 from {0}, terminating'.format(
                self.api_integration_url))
            self._stopper(force=True)

        elif changes.status_code == 200:

            r = changes.json()
            c = r['_items']

            if len(c) == 0:
                self.log.debug('No change records, populating')
                self.populate()

            elif len(c) == 1:
                # Check date then decide to populate or not!
                self.log.debug('Got last change records, checking times')

                sequential_ordinal = dateutil.parser.parse(
                    c[0]['sequence_ordinal']).replace(tzinfo=self.tz_utc)

                self.log.debug('Last change message recorded {0}'.format(
                    sequential_ordinal.astimezone(self.tz_local).isoformat()))

                self.initial_start = sequential_ordinal + timedelta(
                    seconds=self.initial_timedelta) - timedelta(
                        hours=self.overlap_timedelta)

                if self.initial_start.tzinfo is None or self.initial_start.tzinfo.utcoffset(
                        self.initial_start) is None:
                    self.initial_start = self.initial_start.replace(
                        self.tz_local)

                if self.initial_start < datetime.utcnow().replace(
                        tzinfo=self.tz_utc) - timedelta(
                            hours=self.populate_interval):
                    """More than 30 days!"""
                    self.log.debug('More than {} days, populating'.format(
                        self.populate_interval))
                    self.populate()
                    self.state.set_state(mode='populate', state='initialized')
                else:
                    self.log.debug('Less than {} hours, syncing'.format(
                        self.populate_interval))
                    self.job.modify(next_run_time=datetime.now())
                    self.log.debug('Told job to start immediately')
                    self.log.debug('Starting sync scheduler')
                    self.scheduler.start()
                    self.state.set_state(mode='sync', state='started')

        else:
            self.log.error('{0} from {1}, terminating'.format(
                changes.status_code, self.api_integration_url))
            sys.exit()

    def _eve_fix_sync(self, o) -> dict:
        """Just make soap response simpler

        .. danger::
            Deferred. Uses :py:class:`typings.changes.Changes` instead.
        """

        if o['Changes'] is None:
            o['Changes'] = []
        elif 'ChangeInfo' in o['Changes']:
            o['Changes'] = o['Changes']['ChangeInfo']

            for key, value in enumerate(o['Changes']):
                if o['Changes'][key]['MergeResultOf'] is None:
                    o['Changes'][key]['MergeResultOf'] = []
                elif 'int' in o['Changes'][key]['MergeResultOf']:
                    o['Changes'][key]['MergeResultOf'] = o['Changes'][key][
                        'MergeResultOf']['int']
                else:
                    o['Changes'][key]['MergeResultOf'] = []

        o['_org_id'] = self.org_id

        return o

    def _update_changes(self, changes) -> None:
        """Update change message

        .. note::
            Creates a custom unique '_ordinal' for each change message before trying to insert into api. The purpose
            is to let it gracefully fail with a http 422 if the change message already exists in the api::

                sha224(bytearray(entity_type, id, sequence_ordinal, org_id))

        :param changes: list of change messages
        :type changes: :py:class:`typings.changes.Changes`
        """

        for v in changes:
            v['_ordinal'] = hashlib.sha224(
                bytearray(
                    "%s%s%s%s" % (v['entity_type'], v['id'],
                                  v['sequence_ordinal'], self.org_id),
                    'utf-8')).hexdigest()
            # bytearray("%s%s%s%s" % (self.org_id, v['EntityType'], v['Id'], v['sequence_ordinal']), 'utf-8')).hexdigest()

            v['_status'] = 'ready'  # ready -> running -> finished
            v['_org_id'] = self.org_id
            v['_realm'] = NIF_REALM

            r = requests.post(self.api_integration_url,
                              data=json.dumps(v, cls=EveJSONEncoder),
                              headers=API_HEADERS)

            if r.status_code == 201:
                self.log.debug(
                    'Created change message for {0} with id {1}'.format(
                        v['entity_type'], v['id']))
                self.messages += 1

            elif r.status_code == 422:
                self.log.debug('422 {0} with id {1} already exists'.format(
                    v['entity_type'], v['id']))
            else:
                self.log.error(
                    '{0} - Could not create change message for {1} with id {2}'
                    .format(r.status_code, v['entity_type'], v['id']))
                self.log.error(r.text)

    def _get_change_messages(self, start_date, end_date, resource) -> None:
        """Use NIF GetChanges3"""

        # To avoid future date?
        time.sleep(NIF_SYNC_DELAY)

        if resource == 'changes':
            status, changes = self.nif.get_changes(
                start_date.astimezone(self.tz_local),
                end_date.astimezone(self.tz_local))
        elif resource == 'competence':
            status, changes = self.nif.get_changes_competence(
                start_date.astimezone(self.tz_local),
                end_date.astimezone(self.tz_local))
        elif resource == 'license':
            status, changes = self.nif.get_changes_license(
                start_date.astimezone(self.tz_local),
                end_date.astimezone(self.tz_local))
        elif resource == 'federation':
            status, changes = self.nif.get_changes_federation(
                start_date.astimezone(self.tz_local),
                end_date.astimezone(self.tz_local))
        else:
            raise Exception('Resource gone bad, {}'.format(resource))

        if status is True:

            self.log.debug('Got {} changes for {}'.format(
                len(changes), resource))
            if len(changes) > 0:
                self._update_changes(changes)
            else:
                self.log.debug('Empty change messages list')
            """    
            try:
                self.log.debug('Got {} changes for {}'.format(len(changes), resource))
                if len(changes) > 0:
                    self._update_changes(changes)
            except TypeError:
                self.log.debug('Empty change messages list (_get_changes3)')
            except Exception as e:
                self.log.exception('Unknown exception (_get_changes3)')
            """

        else:
            self.log.error('GetChanges returned error: {0} - {1}'.format(
                changes.get('code', 0), changes.get('error', 'Unknown error')))
            raise Exception('_get_changes_messages returned an error')

    def _get_changes(self, start_date, end_date) -> None:
        """Get change messages based on :py:attr:`.sync_type`"""

        self.from_to = [start_date, end_date]  # Adding extra info

        try:
            self._get_change_messages(start_date, end_date, self.sync_type)

            if self.sync_errors > 0:
                self.sync_errors -= 1

            return True

        except requests.exceptions.ConnectionError:
            self.sync_errors += 1
            self.log.error('Connection error in _get_changes')

            # Retry @TODO see if retry should be in populate and sync
            if NIF_SYNC_MAX_ERRORS >= self.sync_errors:
                time.sleep(3 * self.sync_errors)
                self._get_changes(start_date, end_date)
        except TypeError:
            self.log.debug('TypeError: Empty change messages list ({})'.format(
                self.sync_type))
        except Exception as e:
            self.sync_errors += 1
            self.log.exception('Exception in _get_changes')
            # @TODO Need to verify if this is reason to warn somehow??

        return False

    def sync(self) -> None:
        """This method is the job run by the scheduler when last change message is < NIF_POPULATE_INTERVAL.

        If a job misfires, then on next run the interval to sync will be twice.

        .. note::
            Checks if :py:attr:`.sync_errors` > :py:attr:`.sync_errors_max` and if so it will set :py:attr:`._stopper`
            for this thread and will run :py:meth:`._stopper` as it always checks, which in turn calls
            :py:meth:`._shutdown` and terminates the thread.
        """

        self.state.set_state(mode='sync', state='running')

        # Check if stopper is set
        self._stopper()

        self.log.debug('Getting sync messages')

        if self.initial_start is not None:
            end = datetime.utcnow().replace(tzinfo=self.tz_utc)
            start = self.initial_start + timedelta(
                seconds=self.initial_timedelta)

            self.log.debug('From:   {0}'.format(
                start.astimezone(self.tz_local).isoformat()))
            self.log.debug('To:     {0}'.format(
                end.astimezone(self.tz_local).isoformat()))

            if end > start:
                if self._get_changes(start, end):
                    self.initial_start = end
            else:
                self.log.error('Inconsistence between dates')
        else:
            end = datetime.utcnow().replace(tzinfo=self.tz_utc)
            self.initial_start = end - timedelta(minutes=self.sync_interval)

            self.log.debug('From:   {0}'.format(
                self.initial_start.astimezone(self.tz_local).isoformat()))
            self.log.debug('To:     {0}'.format(
                end.astimezone(self.tz_local).isoformat()))

            if end > self.initial_start:
                if self._get_changes(self.initial_start, end):
                    self.initial_start = end
            else:
                self.log.error('Inconsistence between dates')

        self.state.set_state(mode='sync', state='sleeping')

    def populate(self, sync_after=True) -> None:
        """Populates change messages from :py:attr:`.org_created` until last change message timedelta is less than
        :py:attr:`.populate_interval` from which it will exit and start :py:attr:`scheduler`.

        .. attention::
            :py:meth:`populate` requires a slot in the connectionpool. Getting a slot requires acquiring
            :py:attr:`lock`. Number of slots available is set in :py:mod:`syncdaemon` on startup.
        """
        self.state.set_state(mode='populate', state='initializing')
        self.log.debug('Populate, interval of {0} hours...'.format(
            self.populate_interval))

        # Initial
        if self.initial_start is None:
            end_date = self.org_created
        else:
            end_date = self.initial_start

        start_date = end_date - timedelta(hours=self.populate_interval)

        # Populate loop
        while end_date < datetime.utcnow().replace(
                tzinfo=self.tz_utc) + timedelta(hours=self.populate_interval):

            # Check stopper
            self._stopper()

            # Aquire lock and run!
            self.log.debug('Waiting for slot in connectionpool...')
            self.state.set_state(state='waiting', reason='connection pool')

            with self.lock:  # .acquire(blocking=True):
                self.state.set_state(state='running')
                # Check stopper, might have waited long time
                self._stopper()

                # Overlapping end date
                if end_date > datetime.utcnow().replace(tzinfo=self.tz_utc):
                    end_date = datetime.utcnow().replace(tzinfo=self.tz_utc)

                    self.log.debug(
                        'Getting last changes between {0} and {1}'.format(
                            start_date.astimezone(self.tz_local).isoformat(),
                            end_date.astimezone(self.tz_local).isoformat()))

                    if self._get_changes(start_date, end_date) is True:
                        # Last populate
                        break  # Break while

                else:
                    self.log.debug(
                        'Getting changes between {0} and {1}'.format(
                            start_date.astimezone(self.tz_local).isoformat(),
                            end_date.astimezone(self.tz_local).isoformat()))

                    if self._get_changes(start_date, end_date) is True:
                        # Next iteration
                        start_date = end_date
                        end_date = end_date + timedelta(
                            hours=self.populate_interval)

                    time.sleep(0.1)  # Grace before we release lock

        # Since last assignment do not work, use last end_date = start_date for last iteration
        self.initial_start = start_date
        self.state.set_state(mode='populate',
                             state='finished',
                             reason='ended populate')

        if sync_after is True:
            self.log.debug('Starting sync scheduler...')
            self.scheduler.start()
            self.state.set_state(mode='sync',
                                 state='started',
                                 reason='starting after populate')