Пример #1
0
    def _get_backfill_events(self, txn, room_id, event_list, limit):
        logger.debug(
            "_get_backfill_events: %s, %s, %s",
            room_id, repr(event_list), limit
        )

        event_results = set()

        # We want to make sure that we do a breadth-first, "depth" ordered
        # search.

        query = (
            "SELECT depth, prev_event_id FROM event_edges"
            " INNER JOIN events"
            " ON prev_event_id = events.event_id"
            " WHERE event_edges.event_id = ?"
            " AND event_edges.is_state = ?"
            " LIMIT ?"
        )

        queue = PriorityQueue()

        for event_id in event_list:
            depth = self._simple_select_one_onecol_txn(
                txn,
                table="events",
                keyvalues={
                    "event_id": event_id,
                    "room_id": room_id,
                },
                retcol="depth",
                allow_none=True,
            )

            if depth:
                queue.put((-depth, event_id))

        while not queue.empty() and len(event_results) < limit:
            try:
                _, event_id = queue.get_nowait()
            except Empty:
                break

            if event_id in event_results:
                continue

            event_results.add(event_id)

            txn.execute(
                query,
                (event_id, False, limit - len(event_results))
            )

            for row in txn:
                if row[1] not in event_results:
                    queue.put((-row[0], row[1]))

        return event_results
Пример #2
0
    def _get_backfill_events(self, txn, room_id, event_list, limit):
        logger.debug(
            "_get_backfill_events: %s, %s, %s",
            room_id, repr(event_list), limit
        )

        event_results = set()

        # We want to make sure that we do a breadth-first, "depth" ordered
        # search.

        query = (
            "SELECT depth, prev_event_id FROM event_edges"
            " INNER JOIN events"
            " ON prev_event_id = events.event_id"
            " WHERE event_edges.event_id = ?"
            " AND event_edges.is_state = ?"
            " LIMIT ?"
        )

        queue = PriorityQueue()

        for event_id in event_list:
            depth = self._simple_select_one_onecol_txn(
                txn,
                table="events",
                keyvalues={
                    "event_id": event_id,
                    "room_id": room_id,
                },
                retcol="depth",
                allow_none=True,
            )

            if depth:
                queue.put((-depth, event_id))

        while not queue.empty() and len(event_results) < limit:
            try:
                _, event_id = queue.get_nowait()
            except Empty:
                break

            if event_id in event_results:
                continue

            event_results.add(event_id)

            txn.execute(
                query,
                (event_id, False, limit - len(event_results))
            )

            for row in txn:
                if row[1] not in event_results:
                    queue.put((-row[0], row[1]))

        return event_results
class LayersApplier(object):
    """ Most layers replace content. We try to do this intelligently here,
    so that layers don't step over each other. """
    HTML_TAG_REGEX = re.compile(r'<[^>]*?>')

    def __init__(self):
        self.queue = PriorityQueue()
        self.text = None

    def enqueue_from_list(self, elements_list):
        for le in elements_list:
            self.enqueue(le)

    def enqueue(self, layer_element):
        original, replacement, locations = layer_element
        priority = len(original)
        item = (original, replacement, locations)
        self.queue.put((-priority, item))

    def location_replace(self, xml_node, original, replacement, locations):
        LocationReplace().location_replace(xml_node, original, replacement,
                                           locations)

    def replace_all(self, original, replacement):
        """ Replace all occurrences of original with replacement. This is HTML
        aware; it effectively looks at all of the text in between HTML tags"""
        text_chunks = []
        index = 0
        for match in self.HTML_TAG_REGEX.finditer(self.text):
            text = self.text[index:match.start()]
            text_chunks.append(text.replace(original, replacement))
            text_chunks.append(self.text[match.start():match.end()])  # tag
            index = match.end()
        text_chunks.append(self.text[index:])  # trailing text
        self.text = "".join(text_chunks)

    def replace_at(self, original, replacement, locations):
        """ Replace the occurrences of original at all the locations with
        replacement. """

        locations.sort()
        self.text = LocationReplace().location_replace_text(
            self.text, original, replacement, locations)

    def apply_layers(self, original_text):
        self.text = original_text

        while not self.queue.empty():
            priority, layer_element = self.queue.get()
            original, replacement, locations = layer_element

            if not locations:
                self.replace_all(original, replacement)
            else:
                self.replace_at(original, replacement, locations)

        return self.text
Пример #4
0
 def _create_files_list(self):
     priorityQueue = PriorityQueue()
     for txt_file in self._txt_files:
         wav_file = os.path.splitext(txt_file)[0] + ".wav"
         wav_file_size = os.path.getsize(wav_file)
         priorityQueue.put((wav_file_size, (txt_file, wav_file)))
     files_list = []
     while not priorityQueue.empty():
         priority, (txt_file, wav_file) = priorityQueue.get()
         files_list.append((txt_file, wav_file))
     return files_list
Пример #5
0
class LayersApplier(object):
    """ Most layers replace content. We try to do this intelligently here,
    so that layers don't step over each other. """

    HTML_TAG_REGEX = re.compile(r"<[^>]*?>")

    def __init__(self):
        self.queue = PriorityQueue()
        self.text = None

    def enqueue_from_list(self, elements_list):
        for le in elements_list:
            self.enqueue(le)

    def enqueue(self, layer_element):
        original, replacement, locations = layer_element
        priority = len(original)
        item = (original, replacement, locations)
        self.queue.put((-priority, item))

    def location_replace(self, xml_node, original, replacement, locations):
        LocationReplace().location_replace(xml_node, original, replacement, locations)

    def unescape_text(self):
        """ Because of the way we do replace_all(), we need to unescape HTML
        entities.  """
        self.text = HTMLParser().unescape(self.text)

    def replace_all(self, original, replacement):
        """ Replace all occurrences of original with replacement. This is HTML
        aware; it effectively looks at all of the text in between HTML tags"""
        text_chunks = []
        index = 0
        for match in self.HTML_TAG_REGEX.finditer(self.text):
            text = self.text[index : match.start()]
            text_chunks.append(text.replace(original, replacement))
            text_chunks.append(self.text[match.start() : match.end()])  # tag
            index = match.end()
        text_chunks.append(self.text[index:])  # trailing text
        self.text = "".join(text_chunks)
        self.unescape_text()

    def replace_at(self, original, replacement, locations):
        """ Replace the occurrences of original at all the locations with
        replacement. """

        locations.sort()
        self.text = LocationReplace().location_replace_text(self.text, original, replacement, locations)
        self.unescape_text()

    def apply_layers(self, original_text):
        self.text = original_text

        while not self.queue.empty():
            priority, layer_element = self.queue.get()
            original, replacement, locations = layer_element

            if not locations:
                self.replace_all(original, replacement)
            else:
                self.replace_at(original, replacement, locations)

        return self.text