def _archive_clip(self, file_path, samples, start_index): station = self._recording.station # Get clip start time as a `datetime`. start_seconds = start_index / self._sample_rate start_delta = datetime.timedelta(seconds=start_seconds) start_time = self._recording.start_time + start_delta # Get clip length in sample frames. length = len(samples) end_time = signal_utils.get_end_time( start_time, length, self._sample_rate) creation_time = time_utils.get_utc_now() try: with archive_lock.atomic(): with transaction.atomic(): clip = Clip.objects.create( station=station, mic_output=self._mic_output, recording_channel=self._recording_channel, start_index=start_index, length=length, sample_rate=self._sample_rate, start_time=start_time, end_time=end_time, date=station.get_night(start_time), creation_time=creation_time, creating_user=None, creating_job=self._job, creating_processor=self._detector ) # We must create the clip audio file after creating # the clip row in the database. The file's path # depends on the clip ID, which is set as part of # creating the clip row. # # We create the audio file within the database # transaction to ensure that the clip row and # audio file are created atomically. if self._create_clip_files: self._clip_manager.create_audio_file(clip, samples) except Exception as e: self._logger.error(( 'Attempt to create clip from file "{}" failed with message: ' '{}. File will be ignored.').format( file_path, str(e))) else: self._logger.info('Archived {} clip {}.'.format(self.name, clip))
def _import_recordings(self, recordings): for r in recordings: end_time = signal_utils.get_end_time( r.start_time, r.length, r.sample_rate) creation_time = time_utils.get_utc_now() recording = Recording( station=r.station, recorder=r.recorder, num_channels=r.num_channels, length=r.length, sample_rate=r.sample_rate, start_time=r.start_time, end_time=end_time, creation_time=creation_time, creating_job=self._job) recording.save() r.model = recording for channel_num in range(r.num_channels): recorder_channel_num = r.recorder_channel_nums[channel_num] mic_output = r.mic_outputs[channel_num] channel = RecordingChannel( recording=recording, channel_num=channel_num, recorder_channel_num=recorder_channel_num, mic_output=mic_output) channel.save() start_index = 0 for file_num, f in enumerate(r.files): # We store all paths in the archive database as POSIX # paths, even on Windows, for portability, since Python's # `pathlib` module recognizes the slash as a path separator # on all platforms, but not the backslash. path = f.path.as_posix() file = RecordingFile( recording=recording, file_num=file_num, start_index=start_index, length=f.length, path=path) file.save() start_index += f.length
def _archive_clip(self, file_path, samples, start_index): station = self._recording.station # Get clip start time as a `datetime`. start_seconds = start_index / self._sample_rate start_delta = datetime.timedelta(seconds=start_seconds) start_time = self._recording.start_time + start_delta # Get clip length in sample frames. length = len(samples) end_time = signal_utils.get_end_time(start_time, length, self._sample_rate) creation_time = time_utils.get_utc_now() try: with archive_lock.atomic(): with transaction.atomic(): clip = Clip.objects.create( station=station, mic_output=self._mic_output, recording_channel=self._recording_channel, start_index=start_index, length=length, sample_rate=self._sample_rate, start_time=start_time, end_time=end_time, date=station.get_night(start_time), creation_time=creation_time, creating_user=None, creating_job=self._job, creating_processor=self._detector) # We must create the clip audio file after creating # the clip row in the database. The file's path # depends on the clip ID, which is set as part of # creating the clip row. # # We create the audio file within the database # transaction to ensure that the clip row and # audio file are created atomically. if self._create_clip_files: self._clip_manager.create_audio_file(clip, samples) except Exception as e: self._logger.error( ('Attempt to create clip from file "{}" failed with message: ' '{}. File will be ignored.').format(file_path, str(e))) else: self._logger.info('Archived {} clip {}.'.format(self.name, clip))
def _get_recorder(file): end_time = signal_utils.get_end_time(file.start_time, file.length, file.sample_rate) station_recorders = file.station.get_station_devices( 'Audio Recorder', file.start_time, end_time) if len(station_recorders) == 0: raise CommandExecutionError( f'Could not find recorder for recording file "{file.path}".') elif len(station_recorders) > 1: raise CommandExecutionError( f'Found more than one possible recorder for file "{file.path}".') else: return station_recorders[0].device
def _import_clip(self, file_path, info): length, sample_rate = _get_audio_file_info(file_path) start_time = info.start_time end_time = signal_utils.get_end_time(start_time, length, sample_rate) mic_output = self._get_mic_output(info.station.name) recording_channel = self._get_recording_channel( info.station, info.date, sample_rate) recording = recording_channel.recording _assert_recording_contains_clip(recording, start_time, end_time) creation_time = time_utils.get_utc_now() clip = Clip.objects.create(station=info.station, mic_output=mic_output, recording_channel=recording_channel, start_index=None, length=length, sample_rate=sample_rate, start_time=start_time, end_time=end_time, date=info.date, creation_time=creation_time, creating_job=self._job, creating_processor=info.detector) _copy_clip_audio_file(file_path, clip) if info.classification is not None: creation_time = time_utils.get_utc_now() # We assume that any classification performed before the # import was by the user who started the import. creating_user = self._job.creating_user model_utils.annotate_clip(clip, self._annotation_info, info.classification, creation_time=creation_time, creating_user=creating_user)
def _import_clip(self, file_path, info): length, sample_rate = _get_audio_file_info(file_path) start_time = info.start_time end_time = signal_utils.get_end_time(start_time, length, sample_rate) mic_output = self._get_mic_output(info.station.name) recording_channel = self._get_recording_channel( info.station, info.date, sample_rate) recording = recording_channel.recording _assert_recording_contains_clip(recording, start_time, end_time) creation_time = time_utils.get_utc_now() clip = Clip.objects.create( station=info.station, mic_output=mic_output, recording_channel=recording_channel, start_index=None, length=length, sample_rate=sample_rate, start_time=start_time, end_time=end_time, date=info.date, creation_time=creation_time, creating_job=self._job, creating_processor=info.detector) _copy_clip_audio_file(file_path, clip) if info.classification is not None: creation_time = time_utils.get_utc_now() # We assume that any classification performed before the # import was by the user who started the import. creating_user = self._job.creating_user model_utils.annotate_clip( clip, self._annotation_info, info.classification, creation_time=creation_time, creating_user=creating_user)
def _get_recorder(file): end_time = signal_utils.get_end_time( file.start_time, file.length, file.sample_rate) station_recorders = file.station.get_station_devices( 'Audio Recorder', file.start_time, end_time) if len(station_recorders) == 0: raise CommandExecutionError( 'Could not find recorder for recording file "{}".'.format( file.path)) elif len(station_recorders) > 1: raise CommandExecutionError( 'Found more than one possible recorder for file "{}".'.format( file.path)) else: return station_recorders[0].device
def _create_clip(self, clip_info): (recording_channel_id, start_index, length, creation_time, creating_job_id, creating_processor_id, annotations) = clip_info channel, station, mic_output, sample_rate, start_time = \ self._get_recording_channel_info(recording_channel_id) start_offset = signal_utils.get_duration(start_index, sample_rate) start_time += datetime.timedelta(seconds=start_offset) end_time = signal_utils.get_end_time(start_time, length, sample_rate) job = self._get_job(creating_job_id) processor = self._get_processor(creating_processor_id) clip = Clip.objects.create( station=station, mic_output=mic_output, recording_channel=channel, start_index=start_index, length=length, sample_rate=sample_rate, start_time=start_time, end_time=end_time, date=station.get_night(start_time), creation_time=creation_time, creating_user=None, creating_job=job, creating_processor=processor ) if annotations is not None: for name, value in annotations.items(): annotation_info = self._get_annotation_info(name) model_utils.annotate_clip( clip, annotation_info, str(value), creation_time=creation_time, creating_user=None, creating_job=self._job, creating_processor=processor)
def _create_recording_channel(self, station, date, sample_rate): station_name = station.name recorder = self._get_recorder(station_name) mic_output = self._get_mic_output(station_name) start_time, end_time = \ _get_recording_start_and_end_times(station, date) duration = (end_time - start_time).total_seconds() length = int(round(duration * sample_rate)) # Recompute end time from recording start time, length, and # sample rate so it is consistent with our definition (i.e. so # that it is the time of the last sample of the recording rather # than the time of the sample after that). end_time = signal_utils.get_end_time(start_time, length, sample_rate) creation_time = time_utils.get_utc_now() recording = Recording.objects.create( station=station, recorder=recorder, num_channels=_NUM_RECORDING_CHANNELS, length=length, sample_rate=sample_rate, start_time=start_time, end_time=end_time, creation_time=creation_time, creating_job=self._job) recording_channel = RecordingChannel.objects.create( recording=recording, channel_num=_RECORDING_CHANNEL_NUM, recorder_channel_num=_RECORDER_CHANNEL_NUM, mic_output=mic_output) return recording_channel
def _add_channel_clip_start_indices(self, channel, detector): recording = channel.recording recording_start_time = recording.start_time recording_length = recording.length sample_rate = recording.sample_rate create_count_text = text_utils.create_count_text with archive_lock.atomic(): with transaction.atomic(): clips = Clip.objects.filter(recording_channel=channel, creating_processor=detector, start_index=None) num_clips = clips.count() num_clips_found = 0 if num_clips != 0: count_text = create_count_text(num_clips, 'clip') self._logger.info( f'Processing {count_text} for recording channel ' f'"{str(channel)}" and detector "{detector.name}"...') start_time = recording_start_time duration = datetime.timedelta(seconds=recording_length / sample_rate) end_time = start_time + duration # self._logger.info( # f' Recording has start time {str(start_time)} ' # f'and end time {end_time}.') for clip in clips: result = self._find_clip_in_recording(clip, channel) if not isinstance(result, str): # found clip # Get result parts. Note that the clip channel # can change when the clip is found, since in # some cases clips were attributed to the wrong # recordings when the clips were imported. In # one scenario, for example, a clip that was # actually toward the beginning of the second # of two contiguous recordings of a night was # incorrectly assigned to the end of the first # recording, since according to the purported # start times and sample rates of the recordings # the end of the first recording overlapped # the start of the second recording in time. samples, found_channel, start_index = result # Get clip start time. start_seconds = start_index / sample_rate delta = datetime.timedelta(seconds=start_seconds) if found_channel == channel: start_time = recording_start_time + delta else: start_time = \ found_channel.recording.start_time + delta # Get change in clip start time. start_time_change = \ (start_time - clip.start_time).total_seconds() if start_time_change < self._min_start_time_change: self._min_start_time_change = start_time_change if start_time_change > self._max_start_time_change: self._max_start_time_change = start_time_change # Get clip length. The Old Bird detectors # sometimes append zeros to a clip that were # not in the recording that the clip refers # to. We ignore the appended zeros. length = len(samples) duration = signal_utils.get_duration( length, sample_rate) # Get clip end time. end_time = signal_utils.get_end_time( start_time, length, sample_rate) clip.channel = found_channel clip.start_index = start_index clip.length = length clip.start_time = start_time clip.end_time = end_time if not self._dry_run: clip.save() num_clips_found += 1 if num_clips_found != num_clips: self._log_clips_not_found(num_clips - num_clips_found) return num_clips, num_clips_found
def main(): center_index_info = AnnotationInfo.objects.get(name='Call Center Index') center_freq_info = AnnotationInfo.objects.get(name='Call Center Freq') classification_info = AnnotationInfo.objects.get(name='Classification') annotation_user = User.objects.get(username='******') for recording in Recording.objects.all(): print('processing recording {}...'.format(str(recording))) # Get field values that are the same for all clips of this recording. station = recording.station recording_channel = recording.channels.get() mic_output = recording_channel.mic_output sample_rate = recording.sample_rate length = int(round(CLIP_DURATION * sample_rate)) night = station.get_night(recording.start_time) detector = Processor.objects.get(name='BirdVox-70k') clip_data = get_recording_clip_data(recording) center_indices = set() for center_index, center_freq in clip_data: # Some call center indices in the input data are # duplicates, so that clip start indices computed # from them violate a Vesper archive database # uniqueness constraint. We bump duplicate indices # by one until they are unique to resolve the issue. while center_index in center_indices: center_index += 1 center_indices.add(center_index) start_index = center_index - length // 2 start_offset = start_index / sample_rate start_time_delta = datetime.timedelta(seconds=start_offset) start_time = recording.start_time + start_time_delta end_time = signal_utils.get_end_time(start_time, length, sample_rate) creation_time = time_utils.get_utc_now() try: clip = Clip.objects.create(station=station, mic_output=mic_output, recording_channel=recording_channel, start_index=start_index, length=length, sample_rate=sample_rate, start_time=start_time, end_time=end_time, date=night, creation_time=creation_time, creating_processor=detector) except IntegrityError: print(('Duplicate clip with center index {}. ' 'Clip will be ignored.').format(center_index), file=sys.stderr) else: # Add classification annotation. classification = get_classification(center_freq) StringAnnotation.objects.create(clip=clip, info=classification_info, value=classification, creation_time=creation_time, creating_user=annotation_user) if classification.startswith('Call.'): # Add center time annotation. StringAnnotation.objects.create( clip=clip, info=center_index_info, value=str(center_index), creation_time=creation_time, creating_user=annotation_user) # Add center frequency annotation. StringAnnotation.objects.create( clip=clip, info=center_freq_info, value=str(center_freq), creation_time=creation_time, creating_user=annotation_user)
def _create_clips(self, threshold): if not _CREATE_CLIPS: return # TODO: Find out exactly what database queries are # executed during detection (ideally, record the sequence # of queries) to see if database interaction could be # made more efficient, for example with a cache. recording_channel = self._recording_channel detector_model = self._detector_model start_offset = self._file_start_index + self._interval_start_index creation_time = time_utils.get_utc_now() create_clip_files = self._create_clip_files if self._defer_clip_creation: for start_index, length, annotations in self._clips: start_index += start_offset clip = [ recording_channel.id, start_index, length, creation_time, self._job.id, detector_model.id, annotations ] self._deferred_clips.append(clip) else: # database writes not deferred station = self._recording.station sample_rate = self._recording.sample_rate mic_output = recording_channel.mic_output if create_clip_files: clips = [] # Create database records for current batch of clips in one # database transaction. # trans_start_time = time.time() try: with archive_lock.atomic(), transaction.atomic(): for start_index, length, annotations in self._clips: # Get clip start time as a `datetime`. start_index += start_offset start_delta = datetime.timedelta(seconds=start_index / sample_rate) start_time = \ self._recording.start_time + start_delta end_time = signal_utils.get_end_time( start_time, length, sample_rate) try: # It would be nice to use Django's # `bulk_create` here, but unfortunately that # won't automatically set clip IDs for us # except (as of this writing) if we're using # PostgreSQL. clip = Clip.objects.create( station=station, mic_output=mic_output, recording_channel=recording_channel, start_index=start_index, length=length, sample_rate=sample_rate, start_time=start_time, end_time=end_time, date=station.get_night(start_time), creation_time=creation_time, creating_user=None, creating_job=self._job, creating_processor=detector_model) if create_clip_files: # Save clip so we can create clip file # outside of transaction. clips.append(clip) if annotations is not None: for name, value in annotations.items(): annotation_info = \ self._get_annotation_info(name) model_utils.annotate_clip( clip, annotation_info, str(value), creation_time=creation_time, creating_user=None, creating_job=self._job, creating_processor=detector_model) except Exception as e: # Note that it's important not to perform any # database queries here. If the database raised # the exception, we have to wait until we're # outside of the transaction to query the # database again. raise _ClipCreationError(e) # trans_end_time = time.time() # self._num_transactions += 1 # self._total_transactions_duration += \ # trans_end_time - trans_start_time except _ClipCreationError as e: duration = signal_utils.get_duration(length, sample_rate) clip_string = Clip.get_string(station.name, mic_output.name, detector_model.name, start_time, duration) batch_size = len(self._clips) self._num_database_failures += batch_size if batch_size == 1: prefix = 'Clip' else: prefix = f'All {batch_size} clips in this batch' self._logger.error( f' Attempt to create clip {clip_string} ' f'failed with message: {str(e.wrapped_exception)}. ' f'{prefix} will be ignored.') else: # clip creation succeeded if create_clip_files: for clip in clips: try: self._clip_manager.create_audio_file(clip) except Exception as e: self._num_file_failures += 1 self._logger.error( (' Attempt to create audio file ' 'for clip {} failed with message: {} Clip ' 'database record was still created.').format( str(clip), str(e))) self._clips = []
def _add_channel_clip_start_indices(self, channel, detector): # Stash some data as object attributes so we don't have to # repeatedly pass them to `_find_clip_in_recording_channel` # method or query database there. recording = channel.recording self._recording_start_time = recording.start_time self._recording_length = recording.length self._sample_rate = recording.sample_rate self._channel_num = channel.channel_num create_count_text = text_utils.create_count_text with archive_lock.atomic(): with transaction.atomic(): clips = Clip.objects.filter( recording_channel=channel, creating_processor=detector, start_index=None) num_clips = clips.count() num_clips_found = 0 if num_clips != 0: count_text = create_count_text(num_clips, 'clip') self._logger.info( f'Processing {count_text} for recording channel ' f'"{str(channel)}" and detector "{detector.name}...') for clip in clips: result = self._find_clip_in_recording_channel(clip) if result is not None: start_index = result[1] start_seconds = start_index / self._sample_rate delta = datetime.timedelta(seconds=start_seconds) start_time = self._recording_start_time + delta end_time = signal_utils.get_end_time( start_time, clip.length, self._sample_rate) start_time_change = \ (start_time - clip.start_time).total_seconds() duration = (clip.length - 1) / self._sample_rate self._logger.info( f' {start_index} {str(clip.start_time)} ' f'-> {str(start_time)} {start_time_change} ' f'{duration} {str(end_time)}') clip.start_index = start_index clip.start_time = start_time clip.end_time = end_time if not self._dry_run: clip.save() num_clips_found += 1 if num_clips_found != num_clips: self._log_clips_not_found(num_clips - num_clips_found) return num_clips, num_clips_found
def _create_clips(self, threshold): if not _CREATE_CLIPS: return # TODO: Find out exactly what database queries are # executed during detection (ideally, record the sequence # of queries) to see if database interaction could be # made more efficient, for example with a cache. recording_channel = self._recording_channel detector_model = self._detector_model start_offset = self._file_start_index + self._interval_start_index creation_time = time_utils.get_utc_now() create_clip_files = self._create_clip_files if self._defer_clip_creation: for start_index, length, annotations in self._clips: start_index += start_offset clip = [ recording_channel.id, start_index, length, creation_time, self._job.id, detector_model.id, annotations] self._deferred_clips.append(clip) else: # database writes not deferred station = self._recording.station sample_rate = self._recording.sample_rate mic_output = recording_channel.mic_output if create_clip_files: clips = [] # Create database records for current batch of clips in one # database transaction. # trans_start_time = time.time() try: with archive_lock.atomic(), transaction.atomic(): for start_index, length, annotations in self._clips: try: # Get clip start time as a `datetime`. start_index += start_offset start_delta = datetime.timedelta( seconds=start_index / sample_rate) start_time = \ self._recording.start_time + start_delta end_time = signal_utils.get_end_time( start_time, length, sample_rate) # It would be nice to use Django's # `bulk_create` here, but unfortunately that # won't automatically set clip IDs for us # except (as of this writing) if we're using # PostgreSQL. clip = Clip.objects.create( station=station, mic_output=mic_output, recording_channel=recording_channel, start_index=start_index, length=length, sample_rate=sample_rate, start_time=start_time, end_time=end_time, date=station.get_night(start_time), creation_time=creation_time, creating_user=None, creating_job=self._job, creating_processor=detector_model ) if create_clip_files: # Save clip so we can create clip file # outside of transaction. clips.append(clip) if annotations is not None: for name, value in annotations.items(): annotation_info = \ self._get_annotation_info(name) model_utils.annotate_clip( clip, annotation_info, str(value), creation_time=creation_time, creating_user=None, creating_job=self._job, creating_processor=detector_model) except Exception as e: duration = signal_utils.get_duration( length, sample_rate) clip_string = Clip.get_string( station.name, mic_output.name, detector_model.name, start_time, duration) raise _ClipCreationError(clip_string, e) # trans_end_time = time.time() # self._num_transactions += 1 # self._total_transactions_duration += \ # trans_end_time - trans_start_time except _ClipCreationError as e: batch_size = len(self._clips) self._num_database_failures += batch_size if batch_size == 1: prefix = 'Clip' else: prefix = 'All {} clips in this batch'.format( batch_size) self._logger.error(( ' Attempt to create clip {} failed with ' 'message: {} {} will be ignored.').format( clip_string, str(e.wrapped_exception), prefix)) else: # clip creation succeeded if create_clip_files: for clip in clips: try: self._clip_manager.create_audio_file(clip) except Exception as e: self._num_file_failures += 1 self._logger.error(( ' Attempt to create audio file ' 'for clip {} failed with message: {} Clip ' 'database record was still created.').format( str(clip), str(e))) self._clips = []