Exemplo n.º 1
0
    def replace_at(self, original, replacement, locations):
        """ Replace the occurrences of original at all the locations with
        replacement. """

        locations.sort()
        self.text = LocationReplace().location_replace_text(
            self.text, original, replacement, locations)
        self.unescape_text()
Exemplo n.º 2
0
    def replace_at(self, original, replacement, locations):
        """ Replace the occurrences of original at all the locations with
        replacement. """

        locations.sort()
        self.text = LocationReplace().location_replace_text(
            self.text, original, replacement, locations)
        self.unescape_text()
    def test_location_replace_text(self):
        lr = LocationReplace()
        replaced = lr.location_replace_text('Bunch of as as as', 'as',
                                            '<sub>as</sub>', [0, 2])
        self.assertEqual('Bunch of <sub>as</sub> as <sub>as</sub>', replaced)

        lr = LocationReplace()
        replaced = lr.location_replace_text('Bunch of as as as', 'as',
                                            '<sub>b</sub>', [0, 2])
        self.assertEqual('Bunch of <sub>b</sub> as <sub>b</sub>', replaced)

        lr = LocationReplace()
        replaced = lr.location_replace_text('Bunch of a_{s} a_{s} a_{s}',
                                            'a_{s}', 'a<sub>s</sub>', [0, 2])
        self.assertEqual('Bunch of a<sub>s</sub> a_{s} a<sub>s</sub>',
                         replaced)
Exemplo n.º 4
0
    def inline_replacements(self, text_index, original_text):
        """Apply multiple inline layers to given text (e.g. links,
        highlighting, etc.)"""
        layer_pairs = self.apply_layer(original_text, text_index)

        # convert from offset-based to a search and replace layer.
        for original, replacement, offset in layer_pairs:
            offset_locations = LocationReplace.find_all_offsets(
                original, original_text)
            locations = [offset_locations.index(offset)]
            yield Replacement(original, replacement, locations)
Exemplo n.º 5
0
    def get_layer_pairs(self, text_index, original_text):
        layer_pairs = []
        for layer in self.layers.values():
            layer_pairs += list(layer.apply_layer(original_text, text_index))

        # convert from offset-based to a search and replace layer.
        layer_elements = []

        for o, r, offset in layer_pairs:
            offset_locations = LocationReplace.find_all_offsets(o, original_text)
            locations = [offset_locations.index(offset)]
            layer_elements.append((o, r, locations))
        return layer_elements
Exemplo n.º 6
0
    def get_layer_pairs(self, text_index, original_text):
        layer_pairs = []
        for layer in self.layers.values():
            layer_pairs += list(layer.apply_layer(original_text, text_index))

        # convert from offset-based to a search and replace layer.
        layer_elements = []

        for o, r, offset in layer_pairs:
            offset_locations = LocationReplace.find_all_offsets(o,
                                                                original_text)
            locations = [offset_locations.index(offset)]
            layer_elements.append((o, r, locations))
        return layer_elements
Exemplo n.º 7
0
    def get_layer_pairs(self, text_index, original_text):
        layer_pairs = []
        for layer in self.layers.values():
            applied = layer.apply_layer(original_text, text_index)
            if applied:
                layer_pairs += applied

        #convert from offset-based to a search and replace layer.
        layer_elements = []

        for o, r, offset in layer_pairs:
            offset_locations = LocationReplace.find_all_offsets(o,
                                                                original_text)
            try:
                locations = [offset_locations.index(offset)]
                layer_elements.append((o, r, locations))
            except Exception as ex:
                logging.info('{0!s}'.format(ex))
                logging.info('Problem interpolating offsets: {0}, {1}'.format(offset_locations, offset))
        return layer_elements
Exemplo n.º 8
0
    def get_layer_pairs(self, text_index, original_text):
        layer_pairs = []
        for layer in self.layers.values():
            applied = layer.apply_layer(original_text, text_index)
            if applied:
                layer_pairs += applied

        #convert from offset-based to a search and replace layer.
        layer_elements = []

        for o, r, offset in layer_pairs:
            offset_locations = LocationReplace.find_all_offsets(
                o, original_text)
            try:
                locations = [offset_locations.index(offset)]
                layer_elements.append((o, r, locations))
            except Exception as ex:
                logging.info('{0!s}'.format(ex))
                logging.info('Problem interpolating offsets: {0}, {1}'.format(
                    offset_locations, offset))
        return layer_elements
    def test_location_replace_text(self):
        lr = LocationReplace()
        replaced = lr.location_replace_text('Bunch of as as as',
                                            'as', '<sub>as</sub>', [0, 2])
        self.assertEqual('Bunch of <sub>as</sub> as <sub>as</sub>', replaced)

        lr = LocationReplace()
        replaced = lr.location_replace_text('Bunch of as as as',
                                            'as', '<sub>b</sub>', [0, 2])
        self.assertEqual('Bunch of <sub>b</sub> as <sub>b</sub>', replaced)

        lr = LocationReplace()
        replaced = lr.location_replace_text('Bunch of a_{s} a_{s} a_{s}',
                                            'a_{s}', 'a<sub>s</sub>', [0, 2])
        self.assertEqual('Bunch of a<sub>s</sub> a_{s} a<sub>s</sub>',
                         replaced)
 def location_replace(self, xml_node, original, replacement, locations):
     LocationReplace().location_replace(xml_node, original, replacement,
                                        locations)
Exemplo n.º 11
0
class LayersApplier(object):
    """ Most layers replace content. We try to do this intelligently here,
    so that layers don't step over each other. """

    def __init__(self):
        self.queue = PriorityQueue()
        self.text = None

    def enqueue_from_list(self, elements_list):
        for le in elements_list:
            self.enqueue(le)

    def enqueue(self, layer_element):
        original, replacement, locations = layer_element
        priority = len(original)
        item = (original, replacement, locations)
        self.queue.put((-priority, item))

    def replace(self, xml_node, original, replacement):
        """ Helper method for replace_all(), this actually does the replace.
        This deals with XML nodes, not nodes in the tree. """
        if xml_node.text:
            xml_node.text = xml_node.text.replace(original, replacement)

        for c in xml_node.getchildren():
            self.replace(c, original, replacement)

        if xml_node.tail:
            xml_node.tail = xml_node.tail.replace(original, replacement)

        return xml_node

    def location_replace(self, xml_node, original, replacement, locations):
        LocationReplace().location_replace(xml_node, original, replacement,
                                           locations)

    def unescape_text(self):
        """ Because of the way we do replace_all(), we need to unescape HTML
        entities.  """
        self.text = HTMLParser().unescape(self.text)

    def replace_all(self, original, replacement):
        """ Replace all occurrences of original with replacement. This is HTML
        aware. """

        htmlized = html.fragment_fromstring(self.text, create_parent='div')
        htmlized = self.replace(htmlized, original, replacement)

        self.text = html.tostring(htmlized)
        self.text = self.text.replace("<div>", "", 1)
        self.text = self.text[:self.text.rfind("</div>")]
        self.unescape_text()

    def replace_at(self, original, replacement, locations):
        """ Replace the occurrences of original at all the locations with
        replacement. """

        locations.sort()
        self.text = LocationReplace().location_replace_text(
            self.text, original, replacement, locations)
        self.unescape_text()

    def apply_layers(self, original_text):
        self.text = original_text

        while not self.queue.empty():
            priority, layer_element = self.queue.get()
            original, replacement, locations = layer_element

            if not locations:
                self.replace_all(original, replacement)
            else:
                self.replace_at(original, replacement, locations)

        return self.text
Exemplo n.º 12
0
 def unescape_text(self):
     """ Because of the way we do replace_all(), we need to unescape HTML
     entities.  """
     self.text = HTMLParser().unescape(self.text)
Exemplo n.º 13
0
class LayersApplier(object):
    """ Most layers replace content. We try to do this intelligently here,
    so that layers don't step over each other. """
    def __init__(self):
        self.queue = PriorityQueue()
        self.text = None

    def enqueue_from_list(self, elements_list):
        for le in elements_list:
            self.enqueue(le)

    def enqueue(self, layer_element):
        original, replacement, locations = layer_element
        priority = len(original)
        item = (original, replacement, locations)
        self.queue.put((-priority, item))

    def replace(self, xml_node, original, replacement):
        """ Helper method for replace_all(), this actually does the replace.
        This deals with XML nodes, not nodes in the tree. """
        if xml_node.text:
            xml_node.text = xml_node.text.replace(original, replacement)

        for c in xml_node.getchildren():
            self.replace(c, original, replacement)

        if xml_node.tail:
            xml_node.tail = xml_node.tail.replace(original, replacement)

        return xml_node

    def location_replace(self, xml_node, original, replacement, locations):
        LocationReplace().location_replace(xml_node, original, replacement,
                                           locations)

    def unescape_text(self):
        """ Because of the way we do replace_all(), we need to unescape HTML
        entities.  """
        self.text = HTMLParser().unescape(self.text)

    def replace_all(self, original, replacement):
        """ Replace all occurrences of original with replacement. This is HTML
        aware. """

        htmlized = html.fragment_fromstring(self.text, create_parent='div')
        htmlized = self.replace(htmlized, original, replacement)

        self.text = html.tostring(htmlized)
        self.text = self.text.replace("<div>", "", 1)
        self.text = self.text[:self.text.rfind("</div>")]
        self.unescape_text()

    def replace_at(self, original, replacement, locations):
        """ Replace the occurrences of original at all the locations with
        replacement. """

        locations.sort()
        self.text = LocationReplace().location_replace_text(
            self.text, original, replacement, locations)
        self.unescape_text()

    def apply_layers(self, original_text):
        self.text = original_text

        while not self.queue.empty():
            priority, layer_element = self.queue.get()
            original, replacement, locations = layer_element

            if not locations:
                self.replace_all(original, replacement)
            else:
                self.replace_at(original, replacement, locations)

        return self.text
Exemplo n.º 14
0
 def unescape_text(self):
     """ Because of the way we do replace_all(), we need to unescape HTML
     entities.  """
     self.text = HTMLParser().unescape(self.text)
 def test_update_offsets_html(self):
     lr = LocationReplace()
     lr.update_offsets("a", "This is a test. It is only a test")
     self.assertEqual(lr.offsets, {0: (8, 9), 1: (27, 28)})
     lr.update_offsets("a", "This is a test. <a href='something'>link</a>")
     self.assertEqual(lr.offsets, {0: (8, 9)})
 def test_update_offsets_html(self):
     lr = LocationReplace()
     lr.update_offsets("a", "This is a test. It is only a test")
     self.assertEqual(lr.offsets, {0: (8, 9), 1: (27, 28)})
     lr.update_offsets("a", "This is a test. <a href='something'>link</a>")
     self.assertEqual(lr.offsets, {0: (8, 9)})