Пример #1
0
    def delete_remote_wal_before(self, wal_segment, site):
        self.log.debug("Starting WAL deletion from: %r before: %r", site, wal_segment)
        storage = self.site_transfers.get(site)
        valid_timeline = True
        tli, log, seg = wal.name_to_tli_log_seg(wal_segment)
        while True:
            if valid_timeline:
                # Decrement one segment if we're on a valid timeline
                if seg == 0 and log == 0:
                    break
                seg, log = wal.get_previous_wal_on_same_timeline(seg, log)

            wal_path = os.path.join(self.config.get("path_prefix", ""), site, "xlog",
                                    wal.name_for_tli_log_seg(tli, log, seg))
            self.log.debug("Deleting wal_file: %r", wal_path)
            try:
                storage.delete_key(wal_path)
                valid_timeline = True
            except FileNotFoundFromStorageError:
                if not valid_timeline or tli <= 1:
                    # if we didn't find any WALs to delete on this timeline or we're already at
                    # timeline 1 there's no need or possibility to try older timelines, break.
                    self.log.info("Could not delete wal_file: %r, returning", wal_path)
                    break
                # let's try the same segment number on a previous timeline, but flag that timeline
                # as "invalid" until we're able to delete at least one segment on it.
                valid_timeline = False
                tli -= 1
                self.log.info("Could not delete wal_file: %r, trying the same segment on a previous "
                              "timeline (%s)", wal_path, wal.name_for_tli_log_seg(tli, log, seg))
            except:  # FIXME: don't catch all exceptions; pylint: disable=bare-except
                self.log.exception("Problem deleting: %r", wal_path)
Пример #2
0
 def delete_remote_wal_before(self, wal_segment, site, pg_version):
     self.log.info("Starting WAL deletion from: %r before: %r, pg_version: %r",
                   site, wal_segment, pg_version)
     storage = self.site_transfers.get(site)
     valid_timeline = True
     tli, log, seg = wal.name_to_tli_log_seg(wal_segment)
     while True:
         if valid_timeline:
             # Decrement one segment if we're on a valid timeline
             if seg == 0 and log == 0:
                 break
             seg, log = wal.get_previous_wal_on_same_timeline(seg, log, pg_version)
         wal_path = os.path.join(self.config["backup_sites"][site]["prefix"], "xlog",
                                 wal.name_for_tli_log_seg(tli, log, seg))
         self.log.debug("Deleting wal_file: %r", wal_path)
         try:
             storage.delete_key(wal_path)
             valid_timeline = True
         except FileNotFoundFromStorageError:
             if not valid_timeline or tli <= 1:
                 # if we didn't find any WALs to delete on this timeline or we're already at
                 # timeline 1 there's no need or possibility to try older timelines, break.
                 self.log.info("Could not delete wal_file: %r, returning", wal_path)
                 break
             # let's try the same segment number on a previous timeline, but flag that timeline
             # as "invalid" until we're able to delete at least one segment on it.
             valid_timeline = False
             tli -= 1
             self.log.info("Could not delete wal_file: %r, trying the same segment on a previous "
                           "timeline (%s)", wal_path, wal.name_for_tli_log_seg(tli, log, seg))
         except Exception as ex:  # FIXME: don't catch all exceptions; pylint: disable=broad-except
             self.log.exception("Problem deleting: %r", wal_path)
             self.metrics.unexpected_exception(ex, where="delete_remote_wal_before")
Пример #3
0
    def run(self):
        self._init_cursor()
        if self.replication_slot:
            self.create_replication_slot()
        timeline = self.start_replication()
        while self.running:
            wal_name = None
            try:
                msg = self.c.read_message()
            except psycopg2.DatabaseError as ex:
                self.log.exception("Unexpected exception in reading walreceiver msg")
                self.stats.unexpected_exception(ex, where="walreceiver_run")
                continue
            self.log.debug("replication_msg: %r, buffer: %r/%r",
                           msg, self.buffer.tell(), XLOG_SEG_SIZE)
            if msg:
                self.latest_activity = datetime.datetime.utcnow()
                log, _, seg = convert_integer_to_lsn(msg.data_start)
                wal_name = name_for_tli_log_seg(timeline, log, seg)

                if not self.latest_wal:
                    self.latest_wal_start = msg.data_start
                    self.latest_wal = wal_name
                self.buffer.write(msg.payload)

                # TODO: Calculate end pos and transmit that?
                msg.cursor.send_feedback(write_lsn=msg.data_start)

            if wal_name and self.latest_wal != wal_name or self.buffer.tell() >= XLOG_SEG_SIZE:
                self.switch_xlog()

            for wal_start, queue in self.callbacks.items():
                with suppress(Empty):
                    transfer_result = queue.get_nowait()
                    self.log.debug("Transfer result: %r", transfer_result)
                    self.completed_wal_segments.add(wal_start)

            for completed_lsn in sorted(self.completed_wal_segments):
                self.callbacks.pop(completed_lsn)
                if self.callbacks:
                    if completed_lsn > min(self.callbacks):
                        pass  # Do nothing since a smaller lsn is still being transferred
                    else:  # Earlier lsn than earlist on-going transfer, just advance flush_lsn
                        self.c.send_feedback(flush_lsn=completed_lsn)
                        self.completed_wal_segments.discard(completed_lsn)
                        self.last_flushed_lsn = completed_lsn
                        self.log.debug("Sent flush_lsn feedback as: %r", self.last_flushed_lsn)
                else:  # No on-going transfer, just advance flush_lsn
                    self.c.send_feedback(flush_lsn=completed_lsn)
                    self.completed_wal_segments.discard(completed_lsn)
                    self.last_flushed_lsn = completed_lsn
                    self.log.debug("Sent flush_lsn feedback as: %r", self.last_flushed_lsn)

            if not msg:
                timeout = KEEPALIVE_INTERVAL - (datetime.datetime.now() - self.c.io_timestamp).total_seconds()
                with suppress(InterruptedError):
                    if not any(select.select([self.c], [], [], max(0, timeout))):
                        self.c.send_feedback()  # timing out, send keepalive
Пример #4
0
    def run(self):
        self._init_cursor()
        if self.replication_slot:
            self.create_replication_slot()
        timeline = self.start_replication()
        while self.running:
            wal_name = None
            try:
                msg = self.c.read_message()
            except psycopg2.DatabaseError as ex:
                self.log.exception("Unexpected exception in reading walreceiver msg")
                self.stats.unexpected_exception(ex, where="walreceiver_run")
                continue
            self.log.debug("replication_msg: %r, buffer: %r/%r",
                           msg, self.buffer.tell(), WAL_SEG_SIZE)
            if msg:
                self.latest_activity = datetime.datetime.utcnow()
                log, _, seg = convert_integer_to_lsn(msg.data_start)
                wal_name = name_for_tli_log_seg(timeline, log, seg)

                if not self.latest_wal:
                    self.latest_wal_start = msg.data_start
                    self.latest_wal = wal_name
                self.buffer.write(msg.payload)

                # TODO: Calculate end pos and transmit that?
                msg.cursor.send_feedback(write_lsn=msg.data_start)

            if wal_name and self.latest_wal != wal_name or self.buffer.tell() >= WAL_SEG_SIZE:
                self.switch_wal()

            for wal_start, queue in self.callbacks.items():
                with suppress(Empty):
                    transfer_result = queue.get_nowait()
                    self.log.debug("Transfer result: %r", transfer_result)
                    self.completed_wal_segments.add(wal_start)

            for completed_lsn in sorted(self.completed_wal_segments):
                self.callbacks.pop(completed_lsn)
                if self.callbacks:
                    if completed_lsn > min(self.callbacks):
                        pass  # Do nothing since a smaller lsn is still being transferred
                    else:  # Earlier lsn than earlist on-going transfer, just advance flush_lsn
                        self.c.send_feedback(flush_lsn=completed_lsn)
                        self.completed_wal_segments.discard(completed_lsn)
                        self.last_flushed_lsn = completed_lsn
                        self.log.debug("Sent flush_lsn feedback as: %r", self.last_flushed_lsn)
                else:  # No on-going transfer, just advance flush_lsn
                    self.c.send_feedback(flush_lsn=completed_lsn)
                    self.completed_wal_segments.discard(completed_lsn)
                    self.last_flushed_lsn = completed_lsn
                    self.log.debug("Sent flush_lsn feedback as: %r", self.last_flushed_lsn)

            if not msg:
                timeout = KEEPALIVE_INTERVAL - (datetime.datetime.now() - self.c.io_timestamp).total_seconds()
                with suppress(InterruptedError):
                    if not any(select.select([self.c], [], [], max(0, timeout))):
                        self.c.send_feedback()  # timing out, send keepalive
Пример #5
0
def test_construct_wal_name():
    sysinfo = {
        "dbname": "",
        "systemid": "6181331723016416192",
        "timeline": "4",
        "xlogpos": "F/190001B0",
    }
    assert wal.construct_wal_name(sysinfo) == wal.name_for_tli_log_seg(4, 0xF, 0x19)
    assert wal.construct_wal_name(sysinfo) == "000000040000000F00000019"
Пример #6
0
def test_construct_wal_name():
    sysinfo = {
        "dbname": "",
        "systemid": "6181331723016416192",
        "timeline": "4",
        "xlogpos": "F/190001B0",
    }
    assert wal.construct_wal_name(sysinfo) == wal.name_for_tli_log_seg(4, 0xF, 0x19)
    assert wal.construct_wal_name(sysinfo) == "000000040000000F00000019"