def synchronize(video_file, subtitle_file, output_file, verbose=False, \ parallelism=3, fixed_skew=None, model_file=None, return_parameters=False, \ **kwargs): """ Automatically synchronize subtitles with audio in a video file. Uses FFMPEG to extract the audio from the video file and the command line tool "ffmpeg" must be available. Uses temporary files which are deleted automatically. Args: video_file (string): Input video file name subtitle_file (string): Input SRT subtitle file name output_file (string): Output (synchronized) SRT subtitle file name verbose (boolean): If True, print progress information to stdout return_parameters (boolean): If True, returns the syncrhonization parameters instead of just the success flag other arguments: Search parameters, see ``autosubsync --help`` Returns: If return_parameters is False (default), returns True on success (quality of fit test passed), False if failed. If return_parameters is True, returns a tuple of four values success (boolean) success flag as above quality (float) metric used to determine the value of "success" skew (float) best fit skew/speed (unitless) shift (float) best fit shift in seconds """ # these are here to enable running as python3 autosubsync/main.py from autosubsync import features from autosubsync import find_transform from autosubsync import model from autosubsync import preprocessing from autosubsync import quality_of_fit # argument parsing if model_file is None: from pkg_resources import resource_filename model_file = resource_filename(__name__, '../trained-model.bin') fixed_skew = parse_skew(fixed_skew) # load model trained_model = model.load(model_file) if verbose: print('Extracting audio using ffmpeg and reading subtitles...') sound_data, subvec = preprocessing.import_target_files( video_file, subtitle_file) if verbose: print(('computing features for %d audio samples ' + \ 'using %d parallel process(es)') % (len(subvec), parallelism)) features_x, shifted_y = features.compute(sound_data, subvec, parallelism=parallelism) if verbose: print('extracted features of size %s, performing speech detection' % \ str(features_x.shape)) y_scores = model.predict(trained_model, features_x) # save some memory before parallelization fork so we look less bad del features_x, sound_data, subvec gc.collect() if verbose: print('computing best fit with %d frames' % len(y_scores)) skew, shift, quality = find_transform.find_transform_parameters(\ shifted_y, y_scores, \ parallelism=parallelism, fixed_skew=fixed_skew, bias=trained_model[1], \ verbose=verbose, **kwargs) success = quality > quality_of_fit.threshold if verbose: print('quality of fit: %g, threshold %g' % (quality, quality_of_fit.threshold)) print('Fit complete. Performing resync, writing to ' + output_file) transform_func = find_transform.parameters_to_transform(skew, shift) preprocessing.transform_srt(subtitle_file, output_file, transform_func) if verbose and success: print('success!') if return_parameters: return success, quality, skew, shift else: return success
print('Training...', train_x.shape) trained_model = model.train(train_x, train_meta.label, train_meta, verbose=True) # save some memory del train_x del train_meta # test serialization with tempfile.TemporaryDirectory() as tmp_dir: tmp_file = os.path.join(tmp_dir, 'model.bin') print('testing serialization in temp file', tmp_file) model.save(trained_model, tmp_file) trained_model = model.load(tmp_file) print('Validating...') predicted_score = model.predict(trained_model, test_x, test_meta.file_number) result_meta = test_meta.assign(predicted_score=predicted_score) result_meta = result_meta.assign( predicted_label=np.round(predicted_score)) result_meta = result_meta.assign(label=np.round(result_meta.label)) result_meta = result_meta.assign( correct=result_meta.predicted_label == result_meta.label) bias = trained_model[1] r = validate_speech_detection(result_meta) sync_r = test_correct_sync(result_meta, bias) sync_results.append(