Exemplo n.º 1
0
class RunPages:

    def __init__(self):
        self.report = Reports()
        self.rdm_add_record = RdmAddRecord()
        
    def get_pure_by_page(self, page_begin: int, page_end: int, page_size: int):
        """ Gets records from Pure 'research-outputs' endpoint by page and submit them to RDM. """

        for page in range(page_begin, page_end):
    
            self.global_counters = initialize_counters()

            # Report intro
            self.report.add_template(['console'], ['general', 'title'], ['PAGES'])
            self.report.add_template(['console'], ['pages', 'page_and_size'], [page, page_size])

            # Pure get request
            response = get_pure_metadata('research-outputs', '', {'page': page, 'pageSize': page_size})

            # Load json response
            resp_json = json.loads(response.content)

            # Creates data to push to RDM
            for item in resp_json['items']:
                self.report.add('')          # adds new line in the console
                self.rdm_add_record.create_invenio_data(self.global_counters, item)

            self.report_summary(page, page_size)


    def report_summary(self, pag, page_size):
        # Global counters
        self.report.summary_global_counters(['console'], self.global_counters)
        # Summary pages.log
        self.report.pages_single_line(self.global_counters, pag, page_size)
Exemplo n.º 2
0
class PureChangesByDate:
    def __init__(self):
        self.add_record = RdmAddRecord()
        self.report = Reports()
        self.delete = Delete()
        self.general_functions = GeneralFunctions()

    def get_pure_changes(self):
        """ Gets from Pure 'changes' endpoint all records that have been created / updated / deleted 
            and modifies accordingly the relative RDM records """

        # Get date of last update
        missing_updates = self._get_missing_updates()
        missing_updates = ['2020-05-15']  # TEMPORARY !!!!!

        if missing_updates == []:
            self.report.add('\nNothing to update.\n')
            return

        for date_to_update in reversed(missing_updates):
            self._changes_by_date(date_to_update)
        return

    def _set_counters_and_title(func):
        def _wrapper(self, changes_date: str):

            # Initialize global counters
            self.global_counters = initialize_counters()

            self.report_files = ['console', 'changes']

            self.report.add_template(self.report_files, ['general', 'title'],
                                     ['CHANGES'])
            self.report.add(f'\nProcessed date: {changes_date}',
                            self.report_files)

            # Decorated function
            func(self, changes_date)

            self._report_summary()

        return _wrapper

    @_set_counters_and_title
    def _changes_by_date(self, changes_date: str):
        """ Gets from Pure all changes that took place in a certain date """

        reference = changes_date
        page = 1

        while reference:
            # Get from pure all changes of a certain date
            response = get_pure_metadata('changes', reference, {})

            if response.status_code >= 300:
                self.report.add(response.content, self.report_files)
                return False

            # Check if there are records in the response from pure
            json_response = self._records_to_process(response, page,
                                                     changes_date)

            # If there are no records to process
            if not json_response:
                return True

            # Used to check if there are doubled tasks (e.g. update uuid and delete same uuid)
            self.duplicated_uuid = []

            self._initialize_local_counters()

            # Iterates over all records that need to be deleted
            self._delete_records(json_response)

            # Create / Add / Update
            self._update_records(json_response)

            # Gets the reference code of the next page
            reference = get_next_page(json_response).split('/')[-1]

            page += 1

    def _records_to_process(self, response: object, page: int,
                            changes_date: str):
        """ Check if there are records in the response from pure """

        # Load response json
        json_response = json.loads(response.content)

        number_records = json_response["count"]

        if number_records == 0:
            # Adds the date to successful_changes.txt
            open(data_files_name['successful_changes'],
                 "a").write(f'{changes_date}\n')

            if page == 1:
                # If there are no changes at all
                self.report.add(f'\n\nNothing to transfer.\n\n',
                                self.report_files)
            return False

        report_line = f'\nPag{add_spaces(page)} @ Pure get changes @ {response} @ Number of items: {add_spaces(number_records)}'
        self.report.add(report_line, self.report_files)

        return json_response

    def _delete_records(self, json_response: dict):
        """ Iterates over the Pure response and process all records that need to be deleted """

        for item in json_response['items']:

            if 'changeType' not in item or 'uuid' not in item:
                continue
            elif item['familySystemName'] != 'ResearchOutput':
                continue
            elif item['changeType'] != 'DELETE':
                continue

            uuid = item['uuid']
            self.duplicated_uuid.append(uuid)
            self.local_counters['delete'] += 1

            report = f"\n{self.local_counters['delete']} @ {item['changeType']}"
            self.report.add(report)

            # Gets the record recid
            recid = self.general_functions.get_recid(uuid,
                                                     self.global_counters)

            if recid:
                # Deletes the record from RDM
                self.delete.record(recid)
            else:
                # The record is not in RDM
                self.global_counters['delete']['success'] += 1
        return True

    def _update_records(self, json_response: dict):
        """ Iterates over the Pure response and process all records that need to be created/updated """

        for item in json_response['items']:

            if 'changeType' not in item or 'uuid' not in item:
                self.local_counters['incomplete'] += 1
                continue
            elif item['familySystemName'] != 'ResearchOutput':
                self.local_counters['not_ResearchOutput'] += 1
                continue
            elif item['changeType'] == 'DELETE':
                continue

            uuid = item['uuid']
            if uuid in self.duplicated_uuid:
                self.local_counters['duplicated'] += 1
                continue

            record_number = add_spaces(self.global_counters['total'] + 1)
            report = f"\n{record_number} - Change type           - {item['changeType']}"
            self.report.add(report)

            if item['changeType'] == 'ADD' or item['changeType'] == 'CREATE':
                self.local_counters['create'] += 1

            if item['changeType'] == 'UPDATE':
                self.local_counters['update'] += 1

            # Checks if this uuid has already been created / updated / deleted
            self.duplicated_uuid.append(uuid)

            # Adds record to RDM
            self.add_record.push_record_by_uuid(self.global_counters, uuid)

    def _get_missing_updates(self):
        """ Reading successful_changes.txt gets the dates in 
            which Pure changes have not been processed """

        file_name = data_files_name['successful_changes']
        check_if_file_exists(file_name)

        missing_updates = []
        count = 0
        days_span = 7

        date_today = str(datetime.today().strftime('%Y-%m-%d'))
        date_check = datetime.strptime(date_today, "%Y-%m-%d").date()

        while count < days_span:

            if str(date_check) not in open(file_name, 'r').read():
                missing_updates.append(str(date_check))

            date_check = date_check - timedelta(days=1)
            count += 1

        return missing_updates

    def _report_summary(self):

        # Global counters
        self.report.summary_global_counters(self.report_files,
                                            self.global_counters)

        arguments = []
        for i in self.local_counters:
            arguments.append(add_spaces(self.local_counters[i]))
        self.report.add_template(self.report_files, ['changes', 'summary'],
                                 arguments)
        return

    def _initialize_local_counters(self):

        # Incomplete:  when the uuid or changeType are not specified
        # Duplicated:  e.g. when a record has been modified twice in a day
        # Irrelevant:  when familySystemName is not ResearchOutput

        self.local_counters = {
            'delete': 0,
            'update': 0,
            'create': 0,
            'incomplete': 0,
            'duplicated': 0,
            'not_ResearchOutput': 0,
        }
Exemplo n.º 3
0
class RdmOwners:
    def __init__(self):
        self.rdm_requests = Requests()
        self.rdm_db = RdmDatabase()
        self.report = Reports()
        self.rdm_add_record = RdmAddRecord()
        self.general_functions = GeneralFunctions()
        self.report_files = ['console', 'owners']

    def _set_counters_and_title(func):
        def _wrapper(self, identifier):

            self.report.add_template(['console'], ['general', 'title'],
                                     ['OWNERS CHECK'])
            self.global_counters = initialize_counters()

            # Decorated function
            func(self, identifier)

        return _wrapper

    @_set_counters_and_title
    def run_owners(self, identifier: str):
        """ Gets from pure all the records related to a certain user (based on orcid or externalId),
            afterwards it modifies/create RDM records accordingly. """

        identifier_value = '0000-0002-4154-6945'  # TEMPORARY
        if identifier == 'externalId':  # TEMPORARY
            # identifier_value = '3261'                 # TEMPORARY
            identifier_value = '30'  # TEMPORARY

        self.report.add(f'\n{identifier}: {identifier_value}\n')

        # Gets the ID and IP of the logged in user
        self.user_id = self._get_user_id_from_rdm()
        # If the user was not found in RDM then there is no owner to add to the record.
        if not self.user_id:
            return

        # Get from pure user_uuid
        self.user_uuid = self._get_user_uuid_from_pure(identifier,
                                                       identifier_value)
        if not self.user_uuid:
            return False

        # Add user to user_ids_match.txt
        if identifier == 'externalId':
            self._add_user_ids_match(identifier_value)

        next_page = True
        page = 1
        self.local_counters = {'create': 0, 'in_record': 0, 'to_update': 0}

        while next_page:

            # Pure request
            params = {'sort': 'modified', 'page': page, 'pageSize': 100}
            response = get_pure_metadata('persons',
                                         f'{self.user_uuid}/research-outputs',
                                         params)
            if response.status_code >= 300:
                return False

            # Initial response proceses and json load
            pure_json = self._process_response(response, page)
            # In case the user has no records
            if not pure_json:
                return True

            # Checks if there is a 'next' page to be processed
            next_page = get_next_page(pure_json)

            # Iterates over all items in the page
            for item in pure_json['items']:

                uuid = item['uuid']
                title = shorten_file_name(item['title'])

                self.report.add(f"\n\tRecord uuid  @ {uuid} @ {title}")

                # Get from RDM the recid
                recid = self.general_functions.get_recid(
                    uuid, self.global_counters)

                # Record NOT in RDM, create it
                if recid == False:
                    self._create_rdm_record(item)
                    continue

                # Gets record metadata from RDM and checks if the user is already a record owner
                self._process_record_owners(recid)

            page += 1

        self._final_report()

    def _process_record_owners(self, recid):
        """ Gets record metadata from RDM and checks if the user is already a record owner """

        response = self.rdm_requests.get_metadata_by_recid(recid)
        rdm_json = json.loads(response.content)['metadata']

        self.report.add(
            f"\tRDM get metadata @ {response} @ Current owners: @ {rdm_json['owners']}"
        )

        if self.user_id not in rdm_json['owners']:
            # The record is in RDM but the logged in user is not among the recod owners
            self._add_user_as_owner(rdm_json, recid)
        else:
            # The record is in RDM and the user is an owner
            self.report.add('\tRDM record status @@ Owner IN record')
            self.local_counters['in_record'] += 1

    def _add_user_as_owner(self, data, recid):
        """ Adds the current logged in user as record owner """

        data['owners'].append(self.user_id)

        self.report.add(
            f"\tRDM record status @ ADDING owner @ New owners: @ {data['owners']}"
        )

        # Add owner to an existing RDM record
        self.general_functions.update_rdm_record(recid, data)

        self.local_counters['to_update'] += 1

    def _create_rdm_record(self, item: dict):
        """ If a record of the processed user is not in RDM creates it """
        item['owners'] = [self.user_id]

        self.report.add('\tRDM record status @@ CREATE record')
        self.local_counters['create'] += 1

        # Creates record metadata and pushes it to RDM
        self.rdm_add_record.create_invenio_data(self.global_counters, item)

    def _final_report(self):
        # Final report
        create = self.local_counters['create']
        update = self.local_counters['to_update']
        in_rec = self.local_counters['in_record']
        report = f"\nCreate: {create} - To update: {update} - In record: {in_rec}"
        self.report.add(report, self.report_files)
        self.report.summary_global_counters(self.report_files,
                                            self.global_counters)

    def _process_response(self, response: object, page: int):
        """ Checks if there are records to process """

        # Load response json
        resp_json = json.loads(response.content)

        total_items = resp_json['count']

        if page == 1:
            self.report.add(f'Total records: {total_items}')

        if page == 1 and total_items == 0:
            self.report.add('\nThe user has no records @ End task\n')
            return False

        self.report.add(f'\nPag {page} - Get person records    - {response}')
        return resp_json

    def _get_user_uuid_from_pure(self, key_name: str, key_value: str):
        """ Given the user's external id it return the relative user uuid  """

        # If the uuid is not found in the first x items then it will continue with the next page
        page = 1
        page_size = 10
        next_page = True

        while next_page:

            params = {
                'page': page,
                'pageSize': page_size,
                'q': f'"{key_value}"'
            }
            response = get_pure_metadata('persons', '', params)

            if response.status_code >= 300:
                self.report.add(response.content, self.report_files)
                return False

            record_json = json.loads(response.content)

            total_items = record_json['count']

            for item in record_json['items']:

                if item[key_name] == key_value:
                    first_name = item['name']['firstName']
                    lastName = item['name']['lastName']
                    uuid = item['uuid']

                    self.report.add(
                        f'Name:    {first_name} {lastName}\nUuid:    {uuid}',
                        self.report_files)

                    if len(uuid) != 36:
                        self.report.add(
                            '\n- Warning! Incorrect user_uuid length -\n',
                            self.report_files)
                        return False
                    return uuid

            # Checks if there is a 'next' page to be processed
            next_page = get_next_page(record_json)

            page += 1

        self.report.add(f'Uuid NOT FOUND - End task\n', self.report_files)
        return False

    #   ---         ---         ---
    def _get_user_id_from_rdm(self):
        """ Gets the ID and IP of the logged in user """

        table_name = 'accounts_user_session_activity'

        # SQL query
        response = self.rdm_db.select_query('user_id, ip', table_name)

        if not response:
            self.report.add(f'\n- {table_name}: No user is logged in -\n',
                            self.report_files)
            return False

        elif len(response) > 1:
            self.report.add(f'\n- {table_name}: Multiple users logged in \n',
                            self.report_files)
            return False

        self.report.add(
            f'user IP: {response[0][1]}\nUser id: {response[0][0]}',
            self.report_files)

        return response[0][0]

    def _add_user_ids_match(self, external_id: str):
        """ Add user to user_ids_match.txt, where are specified:
            rdm_user_id, user_uuid and user_external_id """
        file_name = data_files_name['user_ids_match']

        needs_to_add = self._check_user_ids_match('user_ids_match',
                                                  external_id)

        if needs_to_add:
            open(file_name,
                 'a').write(f'{self.user_id} {self.user_uuid} {external_id}\n')
            report = f'user_ids_match @ Adding id toList @ {self.user_id}, {self.user_uuid}, {external_id}'
            self.report.add(report, self.report_files)

    def _check_user_ids_match(self, file_name: str, external_id: str):

        lines = file_read_lines(file_name)
        for line in lines:
            line = line.split('\n')[0]
            line = line.split(' ')

            # Checks if at least one of the ids match
            if str(self.user_id) == line[0] or self.user_uuid == line[
                    1] or external_id == line[2]:

                if line == [str(self.user_id), self.user_uuid, external_id]:
                    self.report.add('Ids list:   user in list',
                                    self.report_files)
                    return False
        return True

    def _initalizing_method(func):
        def _wrapper(self):

            self.report.add_template(['console'], ['general', 'title'],
                                     ['RECORDS OWNER'])

            # Empty file rdm_reocrds_owner.txt
            file_owner = data_files_name['rdm_record_owners']
            open(file_owner, 'w').close()

            # Decorated function
            func(self)

        return _wrapper

    @_initalizing_method
    def get_rdm_record_owners(self):
        """ Gets all records from RDM and counts how many records belong to each user.
            It also updates the content of all_rdm_records.txt """

        pag = 1
        pag_size = 250

        count = 0
        count_records_per_owner = {}
        all_records_list = ''
        next_page = True

        while next_page == True:

            # REQUEST to RDM
            params = {'sort': 'mostrecent', 'size': pag_size, 'page': pag}
            response = self.rdm_requests.get_metadata(params)

            self.report.add(f'\n{response}\n')

            if response.status_code >= 300:
                self.report.add(response.content)
                break

            resp_json = json.loads(response.content)
            data = ''

            for item in resp_json['hits']['hits']:
                count += 1

                uuid = item['metadata']['uuid']
                recid = item['metadata']['recid']
                owners = item['metadata']['owners']

                line = f'{uuid} - {recid} - {owners}'
                self.report.add(line)
                data += f'{line}\n'

                all_records_list += f'{uuid} {recid}\n'

                for i in owners:
                    if i not in count_records_per_owner:
                        count_records_per_owner[i] = 0
                    count_records_per_owner[i] += 1

            self.report.add(f'\nPag {str(pag)} - Records {count}\n')

            open(data_files_name['rdm_record_owners'], 'a').write(data)

            if 'next' not in resp_json['links']:
                next_page = False

            pag += 1

        # Counts how many records have each owner
        self._count_records_per_owner(count_records_per_owner)

        # Update all_rdm_records.txt file
        self._update_all_rdm_records_file(all_records_list)

    def _count_records_per_owner(self, count_records_per_owner):

        self.report.add('Owner  Records')

        for key in count_records_per_owner:
            records = add_spaces(count_records_per_owner[key])
            key = add_spaces(key)
            self.report.add(f'{key}    {records}')

    def _update_all_rdm_records_file(self, all_records_list):

        # Updates content of all_rdm_records.txt file
        file_all_records_list = data_files_name['all_rdm_records']
        # Empty file
        open(file_all_records_list, 'w').close()
        # Add all records to file
        open(file_all_records_list, 'a').write(all_records_list)