def transcribe(item): data_dir = settings.SPINDLE_SPHINX_TEST_DATA_DIR infile = open(os.path.join(data_dir, item.id)) clips = list(reader.read_clips(infile)) save_transcription(item, clips=clips, engine=current_task.name, logger = logger)
def transcribe(item): data_dir = settings.SPINDLE_SPHINX_TEST_DATA_DIR infile = open(os.path.join(data_dir, item.id)) clips = list(reader.read_clips(infile)) save_transcription(item, clips=clips, engine=current_task.name, logger=logger)
def handle(self, *args, **options): index_filename = args[0] data_dir = args[1] verbose = False url_not_found = [] file_not_found = [] items = Item.objects.bulk_fetch() index = open(index_filename) total_count = 0 for line in index: total_count += 1 index.seek(0, 0) for idx, line in enumerate(index): url, filename = line.split(" ") filename = filename.strip() self.stderr.write(u'\n{:4.1f}% {}\n'.format( 100 * float(idx) / total_count, url)) try: item = items.audio[url] except KeyError: try: item = items.video[url] except KeyError: self.stderr.write( u"No item found -- not imported\n\n".format(url)) url_not_found.append(url) continue self.stderr.write(u'{} {}\n'.format(item.id, item.name)) existing_tracks = item.track_set.filter(name__exact=TRACK_NAME) if sum(track.clip_count for track in existing_tracks.all()): self.stderr.write("Already imported\n\n") continue track = Track(item=item, name=TRACK_NAME) track.save() speaker = Speaker(track=track, name="Speaker 1") speaker.save() path = os.path.join(data_dir, filename) try: clips = [] with open(path) as sphinx_output: for clip in read_clips(sphinx_output, speaker=speaker): if verbose: self.stderr.write(u"{:6.1f} {:6.1f} {}\n".format( clip.intime, clip.outtime, clip.caption_text)) clip.track = track clips.append(clip) Clip.objects.bulk_create(clips) item.archive() self.stderr.write('\n\n') except Exception as err: self.stderr.write(u"Error in reading {}: {}".format(path, err)) file_not_found.append(path) continue if url_not_found: self.stderr.write("{} URLs not found in database:\n".format( len(url_not_found))) for url in url_not_found: self.stderr.write(u'\t{}\n'.format(url)) if file_not_found: self.stderr.write("{} files not found:\n".format( len(file_not_found))) for path in file_not_found: self.stderr.write(u'\t{}\n'.format(path))
def handle(self, *args, **options): index_filename = args[0] data_dir = args[1] verbose = False url_not_found = [] file_not_found = [] items = Item.objects.bulk_fetch() index = open(index_filename) total_count = 0 for line in index: total_count += 1 index.seek(0,0) for idx, line in enumerate(index): url, filename = line.split(" ") filename = filename.strip() self.stderr.write(u'\n{:4.1f}% {}\n'.format( 100 * float(idx) / total_count, url)) try: item = items.audio[url] except KeyError: try: item = items.video[url] except KeyError: self.stderr.write(u"No item found -- not imported\n\n".format(url)) url_not_found.append(url) continue self.stderr.write(u'{} {}\n'.format(item.id, item.name)) existing_tracks = item.track_set.filter(name__exact = TRACK_NAME) if sum(track.clip_count for track in existing_tracks.all()): self.stderr.write("Already imported\n\n") continue track = Track(item=item, name=TRACK_NAME) track.save() speaker = Speaker(track=track, name="Speaker 1") speaker.save() path = os.path.join(data_dir, filename) try: clips = [] with open(path) as sphinx_output: for clip in read_clips(sphinx_output, speaker = speaker): if verbose: self.stderr.write(u"{:6.1f} {:6.1f} {}\n".format( clip.intime, clip.outtime, clip.caption_text)) clip.track = track clips.append(clip) Clip.objects.bulk_create(clips) item.archive() self.stderr.write('\n\n') except Exception as err : self.stderr.write(u"Error in reading {}: {}".format(path, err)) file_not_found.append(path) continue if url_not_found: self.stderr.write("{} URLs not found in database:\n".format( len(url_not_found))) for url in url_not_found: self.stderr.write(u'\t{}\n'.format(url)) if file_not_found: self.stderr.write("{} files not found:\n".format( len(file_not_found))) for path in file_not_found: self.stderr.write(u'\t{}\n'.format(path))