Exemplo n.º 1
0
class BackupApplication(object):

    pt = None
    server = None  # like in BaseApplication
    uuid = None

    def __init__(self, app, name, master_addresses):
        self.app = weakref.proxy(app)
        self.name = name
        self.nm = NodeManager()
        for master_address in master_addresses:
            self.nm.createMaster(address=master_address)

    em = property(lambda self: self.app.em)
    ssl = property(lambda self: self.app.ssl)

    def close(self):
        self.nm.close()
        del self.__dict__

    def setUUID(self, uuid):
        if self.uuid != uuid:
            self.uuid = uuid
            logging.info('Upstream Node ID: %s', uuid_str(uuid))

    def log(self):
        self.nm.log()
        if self.pt is not None:
            self.pt.log()

    def provideService(self):
        logging.info('provide backup')
        poll = self.em.poll
        app = self.app
        pt = app.pt
        while True:
            app.changeClusterState(ClusterStates.STARTING_BACKUP)
            bootstrap = BootstrapManager(self,
                                         NodeTypes.CLIENT,
                                         backup=app.name)
            # {offset -> node}
            self.primary_partition_dict = {}
            # [[tid]]
            self.tid_list = tuple([] for _ in xrange(pt.getPartitions()))
            try:
                while True:
                    for node in pt.getNodeSet(readable=True):
                        if not app.isStorageReady(node.getUUID()):
                            break
                    else:
                        break
                    poll(1)
                node, conn = bootstrap.getPrimaryConnection()
                try:
                    app.changeClusterState(ClusterStates.BACKINGUP)
                    del bootstrap, node
                    self.ignore_invalidations = True
                    conn.setHandler(BackupHandler(self))
                    conn.ask(Packets.AskLastTransaction())
                    # debug variable to log how big 'tid_list' can be.
                    self.debug_tid_count = 0
                    while True:
                        poll(1)
                except PrimaryFailure, msg:
                    logging.error('upstream master is down: %s', msg)
                finally:
                    app.backup_tid = pt.getBackupTid()
                    try:
                        conn.close()
                    except PrimaryFailure:
                        pass
                    try:
                        del self.pt
                    except AttributeError:
                        pass
                    for node in app.nm.getClientList(True):
                        node.getConnection().close()
            except StateChangedException, e:
                if e.args[0] != ClusterStates.STOPPING_BACKUP:
                    raise
                app.changeClusterState(*e.args)
                tid = app.backup_tid
                # Wait for non-primary partitions to catch up,
                # so that all UP_TO_DATE cells are really UP_TO_DATE.
                # XXX: Another possibility could be to outdate such cells, and
                #      they would be quickly updated at the beginning of the
                #      RUNNING phase. This may simplify code.
                # Any unfinished replication from upstream will be truncated.
                while pt.getBackupTid(min) < tid:
                    poll(1)
                last_tid = app.getLastTransaction()
                handler = EventHandler(app)
                if tid < last_tid:
                    assert tid != ZERO_TID
                    logging.warning("Truncating at %s (last_tid was %s)",
                                    dump(app.backup_tid), dump(last_tid))
                else:
                    # We will do a dummy truncation, just to leave backup mode,
                    # so it's fine to start automatically if there's any
                    # missing storage.
                    # XXX: Consider using another method to leave backup mode,
                    #      at least when there's nothing to truncate. Because
                    #      in case of StoppedOperation during VERIFYING state,
                    #      this flag will be wrongly set to False.
                    app._startup_allowed = True
                # If any error happened before reaching this line, we'd go back
                # to backup mode, which is the right mode to recover.
                del app.backup_tid
                # Now back to RECOVERY...
                return tid
            finally:
Exemplo n.º 2
0
class BackupApplication(object):

    pt = None

    def __init__(self, app, name, master_addresses):
        self.app = weakref.proxy(app)
        self.name = name
        self.nm = NodeManager()
        for master_address in master_addresses:
            self.nm.createMaster(address=master_address)

    em = property(lambda self: self.app.em)
    ssl = property(lambda self: self.app.ssl)

    def close(self):
        self.nm.close()
        del self.__dict__

    def log(self):
        self.nm.log()
        if self.pt is not None:
            self.pt.log()

    def provideService(self):
        logging.info('provide backup')
        poll = self.em.poll
        app = self.app
        pt = app.pt
        while True:
            app.changeClusterState(ClusterStates.STARTING_BACKUP)
            bootstrap = BootstrapManager(self, self.name, NodeTypes.CLIENT)
            # {offset -> node}
            self.primary_partition_dict = {}
            # [[tid]]
            self.tid_list = tuple([] for _ in xrange(pt.getPartitions()))
            try:
                while True:
                    for node in pt.getNodeSet(readable=True):
                        if not app.isStorageReady(node.getUUID()):
                            break
                    else:
                        break
                    poll(1)
                node, conn, uuid, num_partitions, num_replicas = \
                    bootstrap.getPrimaryConnection()
                try:
                    app.changeClusterState(ClusterStates.BACKINGUP)
                    del bootstrap, node
                    if num_partitions != pt.getPartitions():
                        raise RuntimeError("inconsistent number of partitions")
                    self.pt = PartitionTable(num_partitions, num_replicas)
                    conn.setHandler(BackupHandler(self))
                    conn.ask(Packets.AskNodeInformation())
                    conn.ask(Packets.AskPartitionTable())
                    conn.ask(Packets.AskLastTransaction())
                    # debug variable to log how big 'tid_list' can be.
                    self.debug_tid_count = 0
                    while True:
                        poll(1)
                except PrimaryFailure, msg:
                    logging.error('upstream master is down: %s', msg)
                finally:
                    app.backup_tid = pt.getBackupTid()
                    try:
                        conn.close()
                    except PrimaryFailure:
                        pass
                    try:
                        del self.pt
                    except AttributeError:
                        pass
            except StateChangedException, e:
                if e.args[0] != ClusterStates.STOPPING_BACKUP:
                    raise
                app.changeClusterState(*e.args)
                tid = app.backup_tid
                # Wait for non-primary partitions to catch up,
                # so that all UP_TO_DATE cells are really UP_TO_DATE.
                # XXX: Another possibility could be to outdate such cells, and
                #      they would be quickly updated at the beginning of the
                #      RUNNING phase. This may simplify code.
                # Any unfinished replication from upstream will be truncated.
                while pt.getBackupTid(min) < tid:
                    poll(1)
                last_tid = app.getLastTransaction()
                handler = EventHandler(app)
                if tid < last_tid:
                    assert tid != ZERO_TID
                    logging.warning("Truncating at %s (last_tid was %s)",
                        dump(app.backup_tid), dump(last_tid))
                else:
                    # We will do a dummy truncation, just to leave backup mode,
                    # so it's fine to start automatically if there's any
                    # missing storage.
                    # XXX: Consider using another method to leave backup mode,
                    #      at least when there's nothing to truncate. Because
                    #      in case of StoppedOperation during VERIFYING state,
                    #      this flag will be wrongly set to False.
                    app._startup_allowed = True
                # If any error happened before reaching this line, we'd go back
                # to backup mode, which is the right mode to recover.
                del app.backup_tid
                # Now back to RECOVERY...
                return tid
            finally: