Пример #1
0
    def _create_ingest(self, file_name):
        """Creates a new ingest for the given file name. The database save is the caller's responsibility.

        :param file_name: The name of the file being ingested
        :type file_name: string
        :returns: The new ingest model
        :rtype: :class:`ingest.models.Ingest`
        """

        ingest = Ingest()
        ingest.file_name = file_name
        ingest.strike_id = self.strike_id
        ingest.media_type = get_media_type(file_name)
        ingest.workspace = self._monitored_workspace

        logger.info('New file on %s: %s', ingest.workspace.name, file_name)
        return ingest
Пример #2
0
    def _process_file(self, file_name, ingest):
        '''Processes the given file in the Strike directory. The file_name
        argument represents a file in the Strike directory to process. If
        file_name is None, then the ingest argument represents an ongoing
        transfer where the file is unexpectedly not in the Strike directory.
        If file_name is not None and ingest is None, then this is a
        new transfer without an ingest record yet. If both arguments are None
        an exception is thrown.

        :param file_name: The name of the file to process (possibly None)
        :type file_name: str
        :param ingest: The ingest model for the file (possibly None)
        :type ingest: :class:`ingest.models.Ingest`
        '''
        if file_name is None and ingest is None:
            raise Exception('Nothing for Strike to process')
        if file_name is None:
            file_name = ingest.file_name
        file_path = os.path.join(self.strike_dir, file_name)
        final_name = self._final_filename(file_name)

        # Create ingest model for new transfer
        if ingest is None:
            msg = 'New file %s has arrived, creating ingest for %s'
            logger.info(msg, file_path, final_name)
            ingest = Ingest()
            # Ingest model should record the actual name of the file (no
            # temporary suffix)
            ingest.file_name = final_name
            ingest.strike_id = self.strike_id
            # TODO: investigate better way to get start time of transfer
            last_access = os.path.getatime(file_path)
            ingest.transfer_path = os.path.join(self.strike_dir, final_name)
            ingest.transfer_started = datetime.utcfromtimestamp(last_access)

        if ingest.status == 'TRANSFERRING':
            # Update bytes transferred
            size = os.path.getsize(file_path)
            ingest.bytes_transferred = size

            # Ensure that file is still in Strike dir as expected
            if not os.path.exists(file_path):
                msg = '%s was being transferred, but the file is now lost'
                logger.error(msg, file_path)
                ingest.status = 'ERRORED'
                ingest.save()
                logger.info('Ingest for %s marked as ERRORED', final_name)
                return

            if self._is_still_transferring(file_name):
                # Update with current progress of the transfer
                ingest.save()
                logger.info('%s is still transferring, progress updated', file_path)
            else:
                # Transfer is complete, will move on to next section
                self._complete_transfer(ingest, size)

        if ingest.status == 'TRANSFERRED':
            if ingest.ingest_path:
                self._prepare_file_for_ingest(ingest)
            else:
                self._defer_file(ingest)
        elif not ingest.status == 'TRANSFERRING':
            msg = 'Strike not expecting to process file with status %s'
            raise Exception(msg, ingest.status)
Пример #3
0
    def _process_file(self, file_name, ingest):
        '''Processes the given file in the Strike directory. The file_name
        argument represents a file in the Strike directory to process. If
        file_name is None, then the ingest argument represents an ongoing
        transfer where the file is unexpectedly not in the Strike directory.
        If file_name is not None and ingest is None, then this is a
        new transfer without an ingest record yet. If both arguments are None
        an exception is thrown.

        :param file_name: The name of the file to process (possibly None)
        :type file_name: str
        :param ingest: The ingest model for the file (possibly None)
        :type ingest: :class:`ingest.models.Ingest`
        '''
        if file_name is None and ingest is None:
            raise Exception('Nothing for Strike to process')
        if file_name is None:
            file_name = ingest.file_name
        file_path = os.path.join(self.strike_dir, file_name)
        final_name = self._final_filename(file_name)

        # Create ingest model for new transfer
        if ingest is None:
            msg = 'New file %s has arrived, creating ingest for %s'
            logger.info(msg, file_path, final_name)
            ingest = Ingest()
            # Ingest model should record the actual name of the file (no
            # temporary suffix)
            ingest.file_name = final_name
            ingest.strike_id = self.strike_id
            # TODO: investigate better way to get start time of transfer
            last_access = os.path.getatime(file_path)
            ingest.transfer_path = os.path.join(self.strike_dir, final_name)
            ingest.transfer_started = datetime.utcfromtimestamp(last_access)

        if ingest.status == 'TRANSFERRING':
            # Update bytes transferred
            size = os.path.getsize(file_path)
            ingest.bytes_transferred = size

            # Ensure that file is still in Strike dir as expected
            if not os.path.exists(file_path):
                msg = '%s was being transferred, but the file is now lost'
                logger.error(msg, file_path)
                ingest.status = 'ERRORED'
                ingest.save()
                logger.info('Ingest for %s marked as ERRORED', final_name)
                return

            if self._is_still_transferring(file_name):
                # Update with current progress of the transfer
                ingest.save()
                logger.info('%s is still transferring, progress updated', file_path)
            else:
                # Transfer is complete, will move on to next section
                self._complete_transfer(ingest, size)

        if ingest.status == 'TRANSFERRED':
            if ingest.ingest_path:
                self._prepare_file_for_ingest(ingest)
            else:
                self._defer_file(ingest)
        elif not ingest.status == 'TRANSFERRING':
            msg = 'Strike not expecting to process file with status %s'
            raise Exception(msg, ingest.status)