def fit(self, fname: str, *_) -> "GenericSubtitleParser": if self.caching and self.fit_fname == ("<stdin>" if fname is None else fname): return self encodings_to_try = (self.encoding, ) with open_file(fname, "rb") as f: subs = f.read() if self.encoding == "infer": encodings_to_try = (chardet.detect(subs)["encoding"], ) self.detected_encoding_ = encodings_to_try[0] logger.info("detected encoding: %s" % self.detected_encoding_) exc = None for encoding in encodings_to_try: try: decoded_subs = subs.decode(encoding, errors="replace").strip() if self.sub_format == "srt": parsed_subs = srt.parse(decoded_subs) elif self.sub_format in ("ass", "ssa", "sub"): parsed_subs = pysubs2.SSAFile.from_string(decoded_subs) else: raise NotImplementedError("unsupported format: %s" % self.sub_format) extra_generic_subtitle_file_kwargs = {} if isinstance(parsed_subs, pysubs2.SSAFile): extra_generic_subtitle_file_kwargs.update( dict( styles=parsed_subs.styles, # pysubs2 on Python >= 3.6 doesn't support this fonts_opaque=getattr(parsed_subs, "fonts_opaque", None), info=parsed_subs.info if not self._skip_ssa_info else None, )) self.subs_ = GenericSubtitlesFile( _preprocess_subs( parsed_subs, max_subtitle_seconds=self.max_subtitle_seconds, start_seconds=self.start_seconds, ), sub_format=self.sub_format, encoding=encoding, **extra_generic_subtitle_file_kwargs, ) self.fit_fname = "<stdin>" if fname is None else fname if len(encodings_to_try) > 1: self.detected_encoding_ = encoding logger.info("detected encoding: %s" % self.detected_encoding_) return self except Exception as e: exc = e continue raise exc
def fit(self, subs: GenericSubtitlesFile, *_): scaled_subs = [] for sub in subs: scaled_subs.append( GenericSubtitle( # py2 doesn't support direct multiplication of timedelta w/ float timedelta(seconds=sub.start.total_seconds() * self.scale_factor), timedelta(seconds=sub.end.total_seconds() * self.scale_factor), sub.inner, ) ) self.subs_ = subs.clone_props_for_subs(scaled_subs) return self
def fit(self, output_subs: GenericSubtitlesFile, *_): def _merger_gen(a, b): ita, itb = iter(a), iter(b) cur_a = next(ita, None) cur_b = next(itb, None) while True: if cur_a is None and cur_b is None: return elif cur_a is None: while cur_b is not None: yield cur_b cur_b = next(itb, None) return elif cur_b is None: while cur_a is not None: yield cur_a cur_a = next(ita, None) return # else: neither are None if cur_a.start < cur_b.start: swapped = False else: swapped = True cur_a, cur_b = cur_b, cur_a ita, itb = itb, ita prev_a = cur_a while prev_a is not None and cur_a.start < cur_b.start: cur_a = next(ita, None) if cur_a is None or cur_a.start < cur_b.start: yield prev_a prev_a = cur_a if prev_a is None: while cur_b is not None: yield cur_b cur_b = next(itb, None) return if cur_b.start - prev_a.start < cur_a.start - cur_b.start: if swapped: yield cur_b.merge_with(prev_a) ita, itb = itb, ita cur_a, cur_b = cur_b, cur_a cur_a = next(ita, None) else: yield prev_a.merge_with(cur_b) cur_b = next(itb, None) else: if swapped: yield cur_b.merge_with(cur_a) ita, itb = itb, ita else: yield cur_a.merge_with(cur_b) cur_a = next(ita, None) cur_b = next(itb, None) merged_subs = [] if self.first == "reference": first, second = self.reference_subs, output_subs else: first, second = output_subs, self.reference_subs for merged in _merger_gen(first, second): merged_subs.append(merged) self.subs_ = output_subs.clone_props_for_subs(merged_subs) return self
def fit(self, subs: GenericSubtitlesFile, *_): self.subs_ = subs.offset(self.td_seconds) return self