def _create_ingest(self, file_name): """Creates a new ingest for the given file name. The database save is the caller's responsibility. :param file_name: The name of the file being ingested :type file_name: string :returns: The new ingest model :rtype: :class:`ingest.models.Ingest` """ ingest = Ingest() ingest.file_name = file_name ingest.strike_id = self.strike_id ingest.media_type = get_media_type(file_name) ingest.workspace = self._monitored_workspace logger.info('New file on %s: %s', ingest.workspace.name, file_name) return ingest
def _process_file(self, file_name, ingest): '''Processes the given file in the Strike directory. The file_name argument represents a file in the Strike directory to process. If file_name is None, then the ingest argument represents an ongoing transfer where the file is unexpectedly not in the Strike directory. If file_name is not None and ingest is None, then this is a new transfer without an ingest record yet. If both arguments are None an exception is thrown. :param file_name: The name of the file to process (possibly None) :type file_name: str :param ingest: The ingest model for the file (possibly None) :type ingest: :class:`ingest.models.Ingest` ''' if file_name is None and ingest is None: raise Exception('Nothing for Strike to process') if file_name is None: file_name = ingest.file_name file_path = os.path.join(self.strike_dir, file_name) final_name = self._final_filename(file_name) # Create ingest model for new transfer if ingest is None: msg = 'New file %s has arrived, creating ingest for %s' logger.info(msg, file_path, final_name) ingest = Ingest() # Ingest model should record the actual name of the file (no # temporary suffix) ingest.file_name = final_name ingest.strike_id = self.strike_id # TODO: investigate better way to get start time of transfer last_access = os.path.getatime(file_path) ingest.transfer_path = os.path.join(self.strike_dir, final_name) ingest.transfer_started = datetime.utcfromtimestamp(last_access) if ingest.status == 'TRANSFERRING': # Update bytes transferred size = os.path.getsize(file_path) ingest.bytes_transferred = size # Ensure that file is still in Strike dir as expected if not os.path.exists(file_path): msg = '%s was being transferred, but the file is now lost' logger.error(msg, file_path) ingest.status = 'ERRORED' ingest.save() logger.info('Ingest for %s marked as ERRORED', final_name) return if self._is_still_transferring(file_name): # Update with current progress of the transfer ingest.save() logger.info('%s is still transferring, progress updated', file_path) else: # Transfer is complete, will move on to next section self._complete_transfer(ingest, size) if ingest.status == 'TRANSFERRED': if ingest.ingest_path: self._prepare_file_for_ingest(ingest) else: self._defer_file(ingest) elif not ingest.status == 'TRANSFERRING': msg = 'Strike not expecting to process file with status %s' raise Exception(msg, ingest.status)