def adapt_pullenti(markup): spans = list(split_overlapping_spans(markup.spans)) spans = list( strip_spans(spans, markup.text, QUOTES + BRACKETS + DASHES + SPACES)) spans = list(filter_empty_spans(spans)) spans = list(adapt_spans(spans, markup.text, PULLENTI_TYPES)) return Markup(markup.text, spans)
def adapt_mitie(markup): # Чувашской Республики". # ---------------------- # год Чарльза Дарвина» # ---------------- spans = list(strip_spans(markup.spans, markup.text, QUOTES + DOT + SPACES)) spans = list(adapt_spans(spans, markup.text, MITIE_TYPES)) return Markup(markup.text, spans)
def adapt_gareev(markup): # extra spaces + dots in spans # News Corp . # ----------- # « Русал » # --------- spans = strip_spans(markup.spans, markup.text, DOT + SPACES) spans = strip_spans_bounds(spans, markup.text, QUOTES + SPACES) spans = adapt_spans(spans, markup.text, GAREEV_TYPES) return Markup(markup.text, list(spans))