示例#1
0
 def _lock(blocking=True):
     if blocking:
         logging.info('<SimpleQueue>._lock.acquire()')
         while not lock(False):
             Serialized.tic(step=1, quiet=True)
         return True
     return lock(False)
示例#2
0
 def _askRebaseObject(self, conn, ttid, oid, request_time):
     conflict = self.app.tm.rebaseObject(ttid, oid)
     if request_time and SLOW_STORE is not None:
         duration = time.time() - request_time
         if duration > SLOW_STORE:
             logging.info('RebaseObject delay: %.02fs', duration)
     conn.answer(Packets.AnswerRebaseObject(conflict))
示例#3
0
文件: app.py 项目: Nexedi/neoppod
    def provideService(self):
        """
        This is the normal mode for a primary master node. Handle transactions
        and stop the service only if a catastrophy happens or the user commits
        a shutdown.
        """
        logging.info('provide service')
        poll = self.em.poll
        self.changeClusterState(ClusterStates.RUNNING)

        # Now everything is passive.
        try:
            while True:
                poll(1)
        except StateChangedException, e:
            if e.args[0] != ClusterStates.STARTING_BACKUP:
                raise
            self.backup_tid = tid = self.getLastTransaction()
            packet = Packets.StartOperation(True)
            tid_dict = {}
            for node in self.nm.getStorageList(only_identified=True):
                tid_dict[node.getUUID()] = tid
                if node.isRunning():
                    node.notify(packet)
            self.pt.setBackupTidDict(tid_dict)
示例#4
0
 def tic(cls, step=-1, check_timeout=(), quiet=False):
     # If you're in a pdb here, 'n' switches to another thread
     # (the following lines are not supposed to be debugged into)
     with cls._tic_lock, cls.pdb():
         if not quiet:
             f = sys._getframe(1)
             try:
                 logging.info('tic (%s:%u) ...',
                     f.f_code.co_filename, f.f_lineno)
             finally:
                 del f
         if cls._busy:
             with cls._busy_cond:
                 while cls._busy:
                     cls._busy_cond.wait()
         for app in check_timeout:
             app.em.epoll.check_timeout = True
             app.em.wakeup()
             del app
         while step:
             event_list = cls._epoll.poll(0)
             if not event_list:
                 break
             step -= 1
             event_list.sort(key=cls._sort_key)
             next_lock = cls._sched_lock
             for fd, event in event_list:
                 self = cls._fd_dict[fd]
                 self._release_next = next_lock.release
                 next_lock = self._lock
             del self
             next_lock.release()
             cls._sched_lock.acquire()
示例#5
0
    def doOperation(self):
        """Handle everything, including replications and transactions."""
        logging.info('doing operation')

        poll = self._poll
        _poll = self.em._poll
        isIdle = self.em.isIdle

        self.master_conn.setHandler(master.MasterOperationHandler(self))
        self.replicator.populate()

        # Forget all unfinished data.
        self.dm.dropUnfinishedData()

        self.task_queue = task_queue = deque()
        try:
            self.dm.doOperation(self)
            while True:
                while task_queue:
                    try:
                        while isIdle():
                            next(task_queue[-1]) or task_queue.rotate()
                            _poll(0)
                        break
                    except StopIteration:
                        task_queue.pop()
                poll()
        finally:
            del self.task_queue
示例#6
0
文件: storage.py 项目: Nexedi/neoppod
 def answerHasLock(self, conn, oid, status):
     store_msg_id = self.app.getHandlerData()['timeout_dict'].pop(oid)
     if status == LockState.GRANTED_TO_OTHER:
         # Stop expecting the timed-out store request.
         self.app.dispatcher.forget(conn, store_msg_id)
         # Object is locked by another transaction, and we have waited until
         # timeout. To avoid a deadlock, abort current transaction (we might
         # be locking objects the other transaction is waiting for).
         raise ConflictError, 'Lock wait timeout for oid %s on %r' % (
             dump(oid), conn)
     # HasLock design required that storage is multi-threaded so that
     # it can answer to AskHasLock while processing store resquests.
     # This means that the 2 cases (granted to us or nobody) are legitimate,
     # either because it gave us the lock but is/was slow to store our data,
     # or because the storage took a lot of time processing a previous
     # store (and did not even considered our lock request).
     # XXX: But storage nodes are still mono-threaded, so they should
     #      only answer with GRANTED_TO_OTHER (if they reply!), except
     #      maybe in very rare cases of race condition. Only log for now.
     #      This also means that most of the time, if the storage is slow
     #      to process some store requests, HasLock will timeout in turn
     #      and the connector will be closed.
     #      Anyway, it's not clear that HasLock requests are useful.
     #      Are store requests potentially long to process ? If not,
     #      we should simply raise a ConflictError on store timeout.
     logging.info('Store of oid %s delayed (storage overload ?)', dump(oid))
示例#7
0
 def connectionLost(self, conn, new_state):
     app = self.app
     node = app.nm.getByUUID(conn.getUUID())
     if node is None:
         return  # for example, when a storage is removed by an admin
     assert node.isStorage(), node
     logging.info("storage node lost")
     if new_state != NodeStates.BROKEN:
         new_state = DISCONNECTED_STATE_DICT.get(node.getType(), NodeStates.DOWN)
     assert new_state in (NodeStates.TEMPORARILY_DOWN, NodeStates.DOWN, NodeStates.BROKEN), new_state
     assert node.getState() not in (NodeStates.TEMPORARILY_DOWN, NodeStates.DOWN, NodeStates.BROKEN), (
         uuid_str(self.app.uuid),
         node.whoSetState(),
         new_state,
     )
     was_pending = node.isPending()
     node.setState(new_state)
     if new_state != NodeStates.BROKEN and was_pending:
         # was in pending state, so drop it from the node manager to forget
         # it and do not set in running state when it comes back
         logging.info("drop a pending node from the node manager")
         app.nm.remove(node)
     app.broadcastNodesInformation([node])
     if app.truncate_tid:
         raise StoppedOperation
     app.broadcastPartitionChanges(app.pt.outdate(node))
     if not app.pt.operational():
         raise StoppedOperation
示例#8
0
文件: patch.py 项目: sshyran/neoppod
 def iterator(self, start=None, stop=None):
     if start:
         try:
             index = self._tidindex
         except AttributeError:
             logging.info("Building index for faster lookup of"
                          " transactions in the FileStorage DB.")
             # Cache a sorted list of all the file pos from oid index.
             # To reduce memory usage, the list is splitted in arrays of
             # low order 32-bit words.
             tindex = defaultdict(lambda: array(typecode))
             for x in self._index.itervalues():
                 tindex[x >> 32].append(x & 0xffffffff)
             index = self._tidindex = []
             for h, l in sorted(tindex.iteritems()):
                 l = array(typecode, sorted(l))
                 x = self._read_data_header(h << 32 | l[0])
                 index.append((x.tid, h, l))
             logging.info("... index built")
         x = bisect(index, (start,)) - 1
         if x >= 0:
             x, h, index = index[x]
             x = self._read_data_header
             h = x(h << 32 | index[bisect(index, Start(x, h, start)) - 1])
             return FileIterator(self._file_name, start, stop, h.tloc)
     return FileIterator(self._file_name, start, stop)
示例#9
0
    def playSecondaryRole(self):
        """
        I play a secondary role, thus only wait for a primary master to fail.
        """
        logging.info('play the secondary role with %r', self.listening_conn)

        # Wait for an announcement. If this is too long, probably
        # the primary master is down.
        # XXX: Same remark as in electPrimary.
        t = time() + 10
        while self.primary_master_node is None:
            self.em.poll(1)
            if t < time():
                # election timeout
                raise ElectionFailure("Election timeout")
        self.master_address_dict.clear()

        # Restart completely. Non-optimized
        # but lower level code needs to be stabilized first.
        for conn in self.em.getConnectionList():
            if not conn.isListening():
                conn.close()

        # Reconnect to primary master node.
        primary_handler = secondary.PrimaryHandler(self)
        ClientConnection(self, primary_handler, self.primary_master_node)

        # and another for the future incoming connections
        self.listening_conn.setHandler(
            identification.SecondaryIdentificationHandler(self))

        while True:
            self.em.poll(1)
示例#10
0
文件: mysqldb.py 项目: Nexedi/neoppod
 def _connect(self):
     kwd = {'db' : self.db, 'user' : self.user}
     if self.passwd is not None:
         kwd['passwd'] = self.passwd
     if self.socket:
         kwd['unix_socket'] = os.path.expanduser(self.socket)
     logging.info('connecting to MySQL on the database %s with user %s',
                  self.db, self.user)
     if self._wait < 0:
         timeout_at = None
     else:
         timeout_at = time.time() + self._wait
     while True:
         try:
             self.conn = MySQLdb.connect(**kwd)
             break
         except Exception:
             if timeout_at is not None and time.time() >= timeout_at:
                 raise
             logging.exception('Connection to MySQL failed, retrying.')
             time.sleep(1)
     self._active = 0
     conn = self.conn
     conn.autocommit(False)
     conn.query("SET SESSION group_concat_max_len = %u" % (2**32-1))
     conn.set_sql_mode("TRADITIONAL,NO_ENGINE_SUBSTITUTION")
     conn.query("SHOW VARIABLES WHERE variable_name='max_allowed_packet'")
     r = conn.store_result()
     (name, value), = r.fetch_row(r.num_rows())
     if int(value) < self._max_allowed_packet:
         raise DatabaseFailure("Global variable %r is too small."
             " Minimal value must be %uk."
             % (name, self._max_allowed_packet // 1024))
     self._max_allowed_packet = int(value)
示例#11
0
 def tic(cls, step=-1, check_timeout=(), quiet=False):
     # If you're in a pdb here, 'n' switches to another thread
     # (the following lines are not supposed to be debugged into)
     with cls._tic_lock, cls.pdb():
         if not quiet:
             f = sys._getframe(1)
             try:
                 logging.info('tic (%s:%u) ...', f.f_code.co_filename,
                              f.f_lineno)
             finally:
                 del f
         if cls._busy:
             with cls._busy_cond:
                 while cls._busy:
                     cls._busy_cond.wait()
         for app in check_timeout:
             app.em.epoll.check_timeout = True
             app.em.wakeup()
             del app
         while step:
             event_list = cls._epoll.poll(0)
             if not event_list:
                 break
             step -= 1
             event_list.sort(key=cls._sort_key)
             next_lock = cls._sched_lock
             for fd, event in event_list:
                 self = cls._fd_dict[fd]
                 self._release_next = next_lock.release
                 next_lock = self._lock
             del self
             next_lock.release()
             cls._sched_lock.acquire()
示例#12
0
 def connectionLost(self, conn, new_state):
     app = self.app
     node = app.nm.getByUUID(conn.getUUID())
     if node is None:
         return  # for example, when a storage is removed by an admin
     assert node.isStorage(), node
     logging.info('storage node lost')
     if new_state != NodeStates.BROKEN:
         new_state = DISCONNECTED_STATE_DICT.get(node.getType(),
                                                 NodeStates.DOWN)
     assert new_state in (NodeStates.TEMPORARILY_DOWN, NodeStates.DOWN,
                          NodeStates.BROKEN), new_state
     assert node.getState() not in (NodeStates.TEMPORARILY_DOWN,
                                    NodeStates.DOWN,
                                    NodeStates.BROKEN), (uuid_str(
                                        self.app.uuid), node.whoSetState(),
                                                         new_state)
     was_pending = node.isPending()
     node.setState(new_state)
     if new_state != NodeStates.BROKEN and was_pending:
         # was in pending state, so drop it from the node manager to forget
         # it and do not set in running state when it comes back
         logging.info('drop a pending node from the node manager')
         app.nm.remove(node)
     app.broadcastNodesInformation([node])
     if app.truncate_tid:
         raise StoppedOperation
     app.broadcastPartitionChanges(app.pt.outdate(node))
     if not app.pt.operational():
         raise StoppedOperation
示例#13
0
文件: app.py 项目: Nexedi/neoppod
    def playSecondaryRole(self):
        """
        I play a secondary role, thus only wait for a primary master to fail.
        """
        logging.info('play the secondary role with %r', self.listening_conn)

        # Wait for an announcement. If this is too long, probably
        # the primary master is down.
        # XXX: Same remark as in electPrimary.
        t = time() + 10
        while self.primary_master_node is None:
            self.em.poll(1)
            if t < time():
                # election timeout
                raise ElectionFailure("Election timeout")
        self.master_address_dict.clear()

        # Restart completely. Non-optimized
        # but lower level code needs to be stabilized first.
        for conn in self.em.getConnectionList():
            if not conn.isListening():
                conn.close()

        # Reconnect to primary master node.
        primary_handler = secondary.PrimaryHandler(self)
        ClientConnection(self, primary_handler, self.primary_master_node)

        # and another for the future incoming connections
        self.listening_conn.setHandler(
            identification.SecondaryIdentificationHandler(self))

        while True:
            self.em.poll(1)
示例#14
0
 def corrupt(offset):
     s0, s1, s2 = (storage_dict[cell.getUUID()]
         for cell in cluster.master.pt.getCellList(offset, True))
     logging.info('corrupt partition %u of %s',
                  offset, uuid_str(s1.uuid))
     s1.dm.deleteObject(p64(np+offset), p64(corrupt_tid))
     return s0.uuid
示例#15
0
    def setNodeState(self, conn, uuid, state):
        logging.info("set node state for %s: %s", uuid_str(uuid), state)
        app = self.app
        node = app.nm.getByUUID(uuid)
        if node is None:
            raise ProtocolError('unknown node')
        if state not in NODE_STATE_WORKFLOW.get(node.getType(), ()):
            raise ProtocolError('can not switch node to this state')
        if uuid == app.uuid:
            raise ProtocolError('can not kill primary master node')

        state_changed = state != node.getState()
        message = ('state changed' if state_changed else
                   'node already in %s state' % state)
        if node.isStorage():
            keep = state == NodeStates.DOWN
            try:
                cell_list = app.pt.dropNodeList([node], keep)
            except PartitionTableException, e:
                raise ProtocolError(str(e))
            node.setState(state)
            if node.isConnected():
                # notify itself so it can shutdown
                node.send(Packets.NotifyNodeInformation(
                    monotonic_time(), [node.asTuple()]))
                # close to avoid handle the closure as a connection lost
                node.getConnection().abort()
            if keep:
                cell_list = app.pt.outdate()
            elif cell_list:
                message = 'node permanently removed'
            app.broadcastPartitionChanges(cell_list)
示例#16
0
文件: simple.py 项目: Nexedi/neoppod
def main():
    args, _, _, defaults = inspect.getargspec(functional.NEOCluster.__init__)
    option_list = zip(args[-len(defaults):], defaults)
    parser = OptionParser(usage="%prog [options] [db...]",
        description="Quickly setup a simple NEO cluster for testing purpose.")
    parser.add_option('--seed', help="settings like node ports/uuids and"
        " cluster name are random: pass any string to initialize the RNG")
    defaults = {}
    for option, default in sorted(option_list):
        kw = {}
        if type(default) is bool:
            kw['action'] = "store_true"
            defaults[option] = False
        elif default is not None:
            defaults[option] = default
            if isinstance(default, int):
                kw['type'] = "int"
        parser.add_option('--' + option, **kw)
    parser.set_defaults(**defaults)
    options, args = parser.parse_args()
    if options.seed:
        functional.random = random.Random(options.seed)
    getLogger().setLevel(INFO)
    cluster = functional.NEOCluster(args, **{x: getattr(options, x)
                                             for x, _ in option_list})
    try:
        cluster.run()
        logging.info("Cluster running ...")
        cluster.waitAll()
    finally:
        cluster.stop()
示例#17
0
    def provideService(self):
        """
        This is the normal mode for a primary master node. Handle transactions
        and stop the service only if a catastrophy happens or the user commits
        a shutdown.
        """
        logging.info('provide service')
        poll = self.em.poll
        self.changeClusterState(ClusterStates.RUNNING)

        # Now everything is passive.
        try:
            while True:
                poll(1)
        except StateChangedException, e:
            if e.args[0] != ClusterStates.STARTING_BACKUP:
                raise
            self.backup_tid = tid = self.getLastTransaction()
            packet = Packets.StartOperation(True)
            tid_dict = {}
            for node in self.nm.getStorageList(only_identified=True):
                tid_dict[node.getUUID()] = tid
                if node.isRunning():
                    node.notify(packet)
            self.pt.setBackupTidDict(tid_dict)
示例#18
0
 def answerHasLock(self, conn, oid, status):
     store_msg_id = self.app.getHandlerData()['timeout_dict'].pop(oid)
     if status == LockState.GRANTED_TO_OTHER:
         # Stop expecting the timed-out store request.
         self.app.dispatcher.forget(conn, store_msg_id)
         # Object is locked by another transaction, and we have waited until
         # timeout. To avoid a deadlock, abort current transaction (we might
         # be locking objects the other transaction is waiting for).
         raise ConflictError, 'Lock wait timeout for oid %s on %r' % (
             dump(oid), conn)
     # HasLock design required that storage is multi-threaded so that
     # it can answer to AskHasLock while processing store resquests.
     # This means that the 2 cases (granted to us or nobody) are legitimate,
     # either because it gave us the lock but is/was slow to store our data,
     # or because the storage took a lot of time processing a previous
     # store (and did not even considered our lock request).
     # XXX: But storage nodes are still mono-threaded, so they should
     #      only answer with GRANTED_TO_OTHER (if they reply!), except
     #      maybe in very rare cases of race condition. Only log for now.
     #      This also means that most of the time, if the storage is slow
     #      to process some store requests, HasLock will timeout in turn
     #      and the connector will be closed.
     #      Anyway, it's not clear that HasLock requests are useful.
     #      Are store requests potentially long to process ? If not,
     #      we should simply raise a ConflictError on store timeout.
     logging.info('Store of oid %s delayed (storage overload ?)', dump(oid))
示例#19
0
def main():
    args, _, _, defaults = inspect.getargspec(functional.NEOCluster.__init__)
    option_list = zip(args[-len(defaults):], defaults)
    parser = OptionParser(
        usage="%prog [options] [db...]",
        description="Quickly setup a simple NEO cluster for testing purpose.")
    parser.add_option(
        '--seed',
        help="settings like node ports/uuids and"
        " cluster name are random: pass any string to initialize the RNG")
    defaults = {}
    for option, default in sorted(option_list):
        kw = {}
        if type(default) is bool:
            kw['action'] = "store_true"
            defaults[option] = False
        elif default is not None:
            defaults[option] = default
            if isinstance(default, int):
                kw['type'] = "int"
        parser.add_option('--' + option, **kw)
    parser.set_defaults(**defaults)
    options, args = parser.parse_args()
    if options.seed:
        functional.random = random.Random(options.seed)
    getLogger().setLevel(INFO)
    cluster = functional.NEOCluster(
        args, **{x: getattr(options, x)
                 for x, _ in option_list})
    try:
        cluster.run()
        logging.info("Cluster running ...")
        cluster.waitAll()
    finally:
        cluster.stop()
示例#20
0
    def sendPartitionTable(self, conn, ptid, row_list):
        app = self.app
        pt = app.pt
        pt.load(ptid, row_list, app.nm)
        if not pt.filled():
            raise ProtocolError('Partial partition table received')
        # Install the partition table into the database for persistence.
        cell_list = []
        offset_list = xrange(pt.getPartitions())
        unassigned_set = set(offset_list)
        for offset in offset_list:
            for cell in pt.getCellList(offset):
                cell_list.append((offset, cell.getUUID(), cell.getState()))
                if cell.getUUID() == app.uuid:
                    unassigned_set.remove(offset)
        # delete objects database
        dm = app.dm
        if unassigned_set:
            if app.disable_drop_partitions:
                logging.info("don't drop data for partitions %r",
                             unassigned_set)
            else:
                logging.debug('drop data for partitions %r', unassigned_set)
                dm.dropPartitions(unassigned_set)

        dm.changePartitionTable(ptid, cell_list, reset=True)
        dm.commit()
示例#21
0
 def _lock(blocking=True):
     if blocking:
         logging.info('<SimpleQueue>._lock.acquire()')
         while not lock(False):
             Serialized.tic(step=1, quiet=True)
         return True
     return lock(False)
示例#22
0
    def sendPartitionTable(self, conn, ptid, num_replicas, row_list):
        app = self.app
        pt = app.pt
        pt.load(ptid, num_replicas, row_list, app.nm)
        if not pt.filled():
            raise ProtocolError('Partial partition table received')
        # Install the partition table into the database for persistence.
        cell_list = []
        unassigned = range(pt.getPartitions())
        for offset in reversed(unassigned):
            for cell in pt.getCellList(offset):
                cell_list.append((offset, cell.getUUID(), cell.getState()))
                if cell.getUUID() == app.uuid:
                    unassigned.remove(offset)
        # delete objects database
        dm = app.dm
        if unassigned:
            if app.disable_drop_partitions:
                logging.info(
                    'partitions %r are discarded but actual deletion'
                    ' of data is disabled', unassigned)
            else:
                logging.debug('drop data for partitions %r', unassigned)
                dm.dropPartitions(unassigned)

        dm.changePartitionTable(ptid, num_replicas, cell_list, reset=True)
        dm.commit()
示例#23
0
    def setNodeState(self, conn, uuid, state):
        logging.info("set node state for %s: %s", uuid_str(uuid), state)
        app = self.app
        node = app.nm.getByUUID(uuid)
        if node is None:
            raise ProtocolError("unknown node")
        if state not in NODE_STATE_WORKFLOW.get(node.getType(), ()):
            raise ProtocolError("can not switch node to this state")
        if uuid == app.uuid:
            raise ProtocolError("can not kill primary master node")

        state_changed = state != node.getState()
        message = "state changed" if state_changed else "node already in %s state" % state
        if node.isStorage():
            keep = state == NodeStates.UNKNOWN
            try:
                cell_list = app.pt.dropNodeList([node], keep)
            except PartitionTableException, e:
                raise ProtocolError(str(e))
            node.setState(state)
            if node.isConnected():
                # notify itself so it can shutdown
                node.notify(Packets.NotifyNodeInformation([node.asTuple()]))
                # close to avoid handle the closure as a connection lost
                node.getConnection().abort()
            if keep:
                cell_list = app.pt.outdate()
            elif cell_list:
                message = "node permanently removed"
            app.broadcastPartitionChanges(cell_list)
示例#24
0
 def test_18_tweakBigPT(self):
     seed = repr(time.time())
     logging.info("using seed %r", seed)
     sn_count = 11
     sn = [
         self.createStorage(None, i + 1, NodeStates.RUNNING)
         for i in xrange(sn_count)
     ]
     for topo in 0, 1:
         r = random.Random(seed)
         if topo:
             for i, s in enumerate(sn, sn_count):
                 s.devpath = str(i % 5),
         pt = PartitionTable(1000, 2)
         pt.setID(1)
         for offset in xrange(pt.np):
             state = CellStates.UP_TO_DATE
             k = r.randrange(1, sn_count)
             for s in r.sample(sn, k):
                 pt._setCell(offset, s, state)
                 if k * r.random() < 1:
                     state = CellStates.OUT_OF_DATE
         pt.log()
         self.tweak(pt)
         self.update(pt)
示例#25
0
 def lockObject(self, ttid, serial, oid, unlock=False):
     """
         Take a write lock on given object, checking that "serial" is
         current.
         Raises:
             DelayedError
             ConflictError
     """
     # check if the object if locked
     locking_tid = self._store_lock_dict.get(oid)
     if locking_tid == ttid and unlock:
         logging.info('Deadlock resolution on %r:%r', dump(oid), dump(ttid))
         # A duplicate store means client is resolving a deadlock, so
         # drop the lock it held on this object, and drop object data for
         # consistency.
         del self._store_lock_dict[oid]
         data_id = self._transaction_dict[ttid].delObject(oid)
         if data_id:
             self._app.dm.pruneData((data_id,))
         # Give a chance to pending events to take that lock now.
         self._app.executeQueuedEvents()
         # Attemp to acquire lock again.
         locking_tid = self._store_lock_dict.get(oid)
     if locking_tid is None:
         previous_serial = None
     elif locking_tid == ttid:
         # If previous store was an undo, next store must be based on
         # undo target.
         previous_serial = self._transaction_dict[ttid].getObject(oid)[2]
         if previous_serial is None:
             # XXX: use some special serial when previous store was not
             # an undo ? Maybe it should just not happen.
             logging.info('Transaction %s storing %s more than once',
                          dump(ttid), dump(oid))
     elif locking_tid < ttid:
         # We have a bigger TTID than locking transaction, so we are younger:
         # enter waiting queue so we are handled when lock gets released.
         # We also want to delay (instead of conflict) if the client is
         # so faster that it is committing another transaction before we
         # processed UnlockInformation from the master.
         logging.info('Store delayed for %r:%r by %r', dump(oid),
                 dump(ttid), dump(locking_tid))
         raise DelayedError
     else:
         # We have a smaller TTID than locking transaction, so we are older:
         # this is a possible deadlock case, as we might already hold locks
         # the younger transaction is waiting upon. Make client release
         # locks & reacquire them by notifying it of the possible deadlock.
         logging.info('Possible deadlock on %r:%r with %r',
             dump(oid), dump(ttid), dump(locking_tid))
         raise ConflictError(ZERO_TID)
     if previous_serial is None:
         previous_serial = self._app.dm.getLastObjectTID(oid)
     if previous_serial is not None and previous_serial != serial:
         logging.info('Resolvable conflict on %r:%r',
             dump(oid), dump(ttid))
         raise ConflictError(previous_serial)
     logging.debug('Transaction %s storing %s', dump(ttid), dump(oid))
     self._store_lock_dict[oid] = ttid
示例#26
0
 def corrupt(offset):
     s0, s1, s2 = (
         storage_dict[cell.getUUID()]
         for cell in cluster.master.pt.getCellList(offset, True))
     logging.info('corrupt partition %u of %s', offset,
                  uuid_str(s1.uuid))
     s1.dm.deleteObject(p64(np + offset), p64(corrupt_tid))
     return s0.uuid
示例#27
0
 def logDelay(self, ttid, locked, oid_serial):
     if self._delayed.get(oid_serial) != locked:
         if self._delayed:
             self._delayed[oid_serial] = locked
         else:
             self._delayed = {oid_serial: locked}
         logging.info('Lock delayed for %s:%s by %s', dump(oid_serial[0]),
                      dump(ttid), dump(locked))
示例#28
0
 def __init__(self, addr, s=None):
     logging.info("initializing connector")
     global connector_cpt
     self.desc = connector_cpt
     connector_cpt += 1
     self.packet_cpt = 0
     self.addr = addr
     Mock.__init__(self)
示例#29
0
 def __init__(self, addr, s=None):
     logging.info("initializing connector")
     global connector_cpt
     self.desc = connector_cpt
     connector_cpt += 1
     self.packet_cpt = 0
     self.addr = addr
     Mock.__init__(self)
示例#30
0
 def lockObject(self, ttid, serial, oid, unlock=False):
     """
         Take a write lock on given object, checking that "serial" is
         current.
         Raises:
             DelayedError
             ConflictError
     """
     # check if the object if locked
     locking_tid = self._store_lock_dict.get(oid)
     if locking_tid == ttid and unlock:
         logging.info('Deadlock resolution on %r:%r', dump(oid), dump(ttid))
         # A duplicate store means client is resolving a deadlock, so
         # drop the lock it held on this object, and drop object data for
         # consistency.
         del self._store_lock_dict[oid]
         data_id = self._transaction_dict[ttid].delObject(oid)
         if data_id:
             self._app.dm.pruneData((data_id, ))
         # Give a chance to pending events to take that lock now.
         self._app.executeQueuedEvents()
         # Attemp to acquire lock again.
         locking_tid = self._store_lock_dict.get(oid)
     if locking_tid is None:
         previous_serial = None
     elif locking_tid == ttid:
         # If previous store was an undo, next store must be based on
         # undo target.
         previous_serial = self._transaction_dict[ttid].getObject(oid)[2]
         if previous_serial is None:
             # XXX: use some special serial when previous store was not
             # an undo ? Maybe it should just not happen.
             logging.info('Transaction %s storing %s more than once',
                          dump(ttid), dump(oid))
     elif locking_tid < ttid:
         # We have a bigger TTID than locking transaction, so we are younger:
         # enter waiting queue so we are handled when lock gets released.
         # We also want to delay (instead of conflict) if the client is
         # so faster that it is committing another transaction before we
         # processed UnlockInformation from the master.
         logging.info('Store delayed for %r:%r by %r', dump(oid),
                      dump(ttid), dump(locking_tid))
         raise DelayedError
     else:
         # We have a smaller TTID than locking transaction, so we are older:
         # this is a possible deadlock case, as we might already hold locks
         # the younger transaction is waiting upon. Make client release
         # locks & reacquire them by notifying it of the possible deadlock.
         logging.info('Possible deadlock on %r:%r with %r', dump(oid),
                      dump(ttid), dump(locking_tid))
         raise ConflictError(ZERO_TID)
     if previous_serial is None:
         previous_serial = self._app.dm.getLastObjectTID(oid)
     if previous_serial is not None and previous_serial != serial:
         logging.info('Resolvable conflict on %r:%r', dump(oid), dump(ttid))
         raise ConflictError(previous_serial)
     logging.debug('Transaction %s storing %s', dump(ttid), dump(oid))
     self._store_lock_dict[oid] = ttid
示例#31
0
 def wrapper(*args, **kw):
     s = repr(time.time()) if seed is None else seed
     logging.info("using seed %r", s)
     r = random.Random(s)
     try:
         administration.random = backup_app.random = replicator.random = r
         return wrapped(*args, **kw)
     finally:
         administration.random = backup_app.random = replicator.random = random
示例#32
0
文件: __init__.py 项目: pyzh/neoppod
 def _lock(blocking=True):
     if blocking:
         logging.info('<SimpleQueue>._lock.acquire()')
         for i in TIC_LOOP:
             if lock(False):
                 return True
             Serialized.tic(step=1, quiet=True, timeout=.001)
         raise Exception("tic is looping forever")
     return lock(False)
示例#33
0
 def kill(self, sig=signal.SIGTERM):
     if self.pid:
         logging.info('kill pid %u', self.pid)
         try:
             pdb.kill(self.pid, sig)
         except OSError:
             traceback.print_last()
     else:
         raise AlreadyStopped
示例#34
0
 def finalize():
     try:
         if data_id_list and not dry_run:
             self.commit()
             logging.info("repair: deleted %s orphan records",
                          self._pruneData(data_id_list))
             self.commit()
     finally:
         l.release()
示例#35
0
 def _connect(self):
     logging.info('connecting to SQLite database %r', self.db)
     self.conn = sqlite3.connect(self.db, check_same_thread=False)
     self.lock(self.db)
     if self.UNSAFE:
         q = self.query
         q("PRAGMA synchronous = OFF")
         q("PRAGMA journal_mode = MEMORY")
     self._config = {}
示例#36
0
    def _acceptIdentification(self, node, uuid, num_partitions, num_replicas, your_uuid, primary, known_master_list):
        app = self.app
        if primary != app.primary_master_node.getAddress():
            raise PrimaryFailure("unexpected primary uuid")

        if your_uuid != app.uuid:
            app.uuid = your_uuid
            logging.info("My UUID: " + uuid_str(your_uuid))

        node.setUUID(uuid)
示例#37
0
 def askHasLock(self, conn, ttid, oid):
     locking_tid = self.app.tm.getLockingTID(oid)
     logging.info('%r check lock of %r:%r', conn, dump(ttid), dump(oid))
     if locking_tid is None:
         state = LockState.NOT_LOCKED
     elif locking_tid is ttid:
         state = LockState.GRANTED
     else:
         state = LockState.GRANTED_TO_OTHER
     conn.answer(Packets.AnswerHasLock(oid, state))
示例#38
0
 def checkRange(self, conn, *args):
     if self.conn_dict.get(conn, self) != conn.getPeerId():
         # Ignore answers to old requests,
         # because we did nothing to cancel them.
         logging.info("ignored AnswerCheck*Range%r", args)
         return
     self.conn_dict[conn] = args
     answer_set = set(self.conn_dict.itervalues())
     if len(answer_set) > 1:
         for answer in answer_set:
             if type(answer) is not tuple:
                 return
         # TODO: Automatically tell corrupted cells to fix their data
         #       if we know a good source.
         #       For the moment, tell master to put them in CORRUPTED state
         #       and keep up checking if useful.
         uuid = self.app.uuid
         args = None if self.source is None else self.conn_dict[
             None if self.source.getUUID() ==
             uuid else self.source.getConnection()]
         uuid_list = []
         for conn, answer in self.conn_dict.items():
             if answer != args:
                 del self.conn_dict[conn]
                 if conn is None:
                     uuid_list.append(uuid)
                 else:
                     uuid_list.append(conn.getUUID())
                     self.app.closeClient(conn)
         p = Packets.NotifyPartitionCorrupted(self.partition, uuid_list)
         self.app.master_conn.send(p)
         if len(self.conn_dict) <= 1:
             logging.warning("check of partition %u aborted",
                             self.partition)
             self.queue.clear()
             self._nextPartition()
             return
     try:
         count, _, max_tid = args
     except ValueError:  # AnswerCheckSerialRange
         count, _, self.next_tid, _, max_oid = args
         if count < CHECK_COUNT:
             logging.debug("partition %u checked from %s to %s",
                           self.partition, dump(self.min_tid),
                           dump(self.max_tid))
             self._nextPartition()
             return
         self.next_oid = add64(max_oid, 1)
     else:  # AnswerCheckTIDRange
         if count < CHECK_COUNT:
             self.next_tid = self.min_tid
             self.next_oid = ZERO_OID
         else:
             self.next_tid = add64(max_tid, 1)
     self._nextRange()
示例#39
0
 def cancel(self):
     offset = self.current_partition
     if offset is not None:
         logging.info('cancel replication of partition %u', offset)
         del self.current_partition
         try:
             self.replicate_dict.setdefault(offset, self.replicate_tid)
             del self.replicate_tid
         except AttributeError:
             pass
         self.getCurrentConnection().close()
示例#40
0
文件: handler.py 项目: pyzh/neoppod
 def askNodeList(self, conn, node_type):
     if node_type is None:
         node_type = 'all'
         node_filter = None
     else:
         node_filter = lambda n: n.getType() is node_type
     logging.info("ask list of %s nodes", node_type)
     node_list = self.app.nm.getList(node_filter)
     node_information_list = [node.asTuple() for node in node_list]
     p = Packets.AnswerNodeList(node_information_list)
     conn.answer(p)
示例#41
0
文件: __init__.py 项目: pyzh/neoppod
 def wrapper(*args, **kw):
     s = repr(time.time()) if seed is None else seed
     logging.info("using seed %r", s)
     r = random.Random(s)
     try:
         administration.random = backup_app.random = replicator.random \
             = r
         return wrapped(*args, **kw)
     finally:
         administration.random = backup_app.random = replicator.random \
             = random
示例#42
0
    def _setupNode(self, conn, node_type, uuid, address, node):
        app = self.app
        if node_type != NodeTypes.MASTER:
            logging.info('reject a connection from a non-master')
            raise NotReadyError

        if node is None:
            node = app.nm.createMaster(address=address)

        self.elect(conn, address)
        return uuid
示例#43
0
    def _setupNode(self, conn, node_type, uuid, address, node):
        app = self.app
        if node_type != NodeTypes.MASTER:
            logging.info('reject a connection from a non-master')
            raise NotReadyError

        if node is None:
            node = app.nm.createMaster(address=address)

        self.elect(conn, address)
        return uuid
示例#44
0
 def cancel(self):
     offset = self.current_partition
     if offset is not None:
         logging.info('cancel replication of partition %u', offset)
         del self.current_partition
         try:
             self.replicate_dict.setdefault(offset, self.replicate_tid)
             del self.replicate_tid
         except AttributeError:
             pass
         self.getCurrentConnection().close()
示例#45
0
    def _acceptIdentification(self, node, uuid, num_partitions, num_replicas,
                              your_uuid, primary, known_master_list):
        app = self.app
        if primary != app.primary_master_node.getAddress():
            raise PrimaryFailure('unexpected primary uuid')

        if your_uuid != app.uuid:
            app.uuid = your_uuid
            logging.info('My UUID: ' + uuid_str(your_uuid))

        node.setUUID(uuid)
示例#46
0
文件: handler.py 项目: Nexedi/neoppod
 def askNodeList(self, conn, node_type):
     if node_type is None:
         node_type = 'all'
         node_filter = None
     else:
         node_filter = lambda n: n.getType() is node_type
     logging.info("ask list of %s nodes", node_type)
     node_list = self.app.nm.getList(node_filter)
     node_information_list = [node.asTuple() for node in node_list ]
     p = Packets.AnswerNodeList(node_information_list)
     conn.answer(p)
示例#47
0
文件: checker.py 项目: Nexedi/neoppod
 def checkRange(self, conn, *args):
     if self.conn_dict.get(conn, self) != conn.getPeerId():
         # Ignore answers to old requests,
         # because we did nothing to cancel them.
         logging.info("ignored AnswerCheck*Range%r", args)
         return
     self.conn_dict[conn] = args
     answer_set = set(self.conn_dict.itervalues())
     if len(answer_set) > 1:
         for answer in answer_set:
             if type(answer) is not tuple:
                 return
         # TODO: Automatically tell corrupted cells to fix their data
         #       if we know a good source.
         #       For the moment, tell master to put them in CORRUPTED state
         #       and keep up checking if useful.
         uuid = self.app.uuid
         args = None if self.source is None else self.conn_dict[
             None if self.source.getUUID() == uuid
                  else self.source.getConnection()]
         uuid_list = []
         for conn, answer in self.conn_dict.items():
             if answer != args:
                 del self.conn_dict[conn]
                 if conn is None:
                     uuid_list.append(uuid)
                 else:
                     uuid_list.append(conn.getUUID())
                     self.app.closeClient(conn)
         p = Packets.NotifyPartitionCorrupted(self.partition, uuid_list)
         self.app.master_conn.notify(p)
         if len(self.conn_dict) <= 1:
             logging.warning("check of partition %u aborted", self.partition)
             self.queue.clear()
             self._nextPartition()
             return
     try:
         count, _, max_tid = args
     except ValueError: # AnswerCheckSerialRange
         count, _, self.next_tid, _, max_oid = args
         if count < CHECK_COUNT:
             logging.debug("partition %u checked from %s to %s",
                 self.partition, dump(self.min_tid), dump(self.max_tid))
             self._nextPartition()
             return
         self.next_oid = add64(max_oid, 1)
     else: # AnswerCheckTIDRange
         if count < CHECK_COUNT:
             self.next_tid = self.min_tid
             self.next_oid = ZERO_OID
         else:
             self.next_tid = add64(max_tid, 1)
     self._nextRange()
示例#48
0
 def deadlock(self, storage_id, ttid, locking_tid):
     try:
         txn = self._ttid_dict[ttid]
     except KeyError:
         return
     if txn.locking_tid <= locking_tid:
         client = txn.getNode()
         txn.locking_tid = locking_tid = self._nextTID()
         logging.info('Deadlock avoidance triggered by %s for %s:'
             ' new locking tid for TXN %s is %s', uuid_str(storage_id),
             uuid_str(client.getUUID()), dump(ttid), dump(locking_tid))
         client.send(Packets.NotifyDeadlock(ttid, locking_tid))
示例#49
0
文件: master.py 项目: Nexedi/neoppod
    def _acceptIdentification(self, node, uuid, num_partitions,
            num_replicas, your_uuid, primary, known_master_list):
        app = self.app

        # Register new master nodes.
        found = False
        conn_address = node.getAddress()
        for node_address, node_uuid in known_master_list:
            if node_address == conn_address:
                assert uuid == node_uuid, (dump(uuid), dump(node_uuid))
                found = True
            n = app.nm.getByAddress(node_address)
            if n is None:
                n = app.nm.createMaster(address=node_address)
            if node_uuid is not None and n.getUUID() != node_uuid:
                n.setUUID(node_uuid)
        assert found, (node, dump(uuid), known_master_list)

        conn = node.getConnection()
        if primary is not None:
            primary_node = app.nm.getByAddress(primary)
            if primary_node is None:
                # I don't know such a node. Probably this information
                # is old. So ignore it.
                logging.warning('Unknown primary master: %s. Ignoring.',
                                primary)
                return
            else:
                if app.trying_master_node is not primary_node:
                    app.trying_master_node = None
                    conn.close()
                app.primary_master_node = primary_node
        else:
            if app.primary_master_node is not None:
                # The primary master node is not a primary master node
                # any longer.
                app.primary_master_node = None

            app.trying_master_node = None
            conn.close()
            return

        # the master must give an UUID
        if your_uuid is None:
            raise ProtocolError('No UUID supplied')
        app.uuid = your_uuid
        logging.info('Got an UUID: %s', dump(app.uuid))

        # Always create partition table
        app.pt = PartitionTable(num_partitions, num_replicas)
示例#50
0
文件: app.py 项目: Nexedi/neoppod
    def __init__(self, config):
        super(Application, self).__init__(
            config.getSSL(), config.getDynamicMasterList())
        self.tm = TransactionManager(self.onTransactionCommitted)

        self.name = config.getCluster()
        self.server = config.getBind()
        self.autostart = config.getAutostart()

        self.storage_readiness = set()
        for master_address in config.getMasters():
            self.nm.createMaster(address=master_address)

        logging.debug('IP address is %s, port is %d', *self.server)

        # Partition table
        replicas, partitions = config.getReplicas(), config.getPartitions()
        if replicas < 0:
            raise RuntimeError, 'replicas must be a positive integer'
        if partitions <= 0:
            raise RuntimeError, 'partitions must be more than zero'
        self.pt = PartitionTable(partitions, replicas)
        logging.info('Configuration:')
        logging.info('Partitions: %d', partitions)
        logging.info('Replicas  : %d', replicas)
        logging.info('Name      : %s', self.name)

        self.listening_conn = None
        self.primary = None
        self.primary_master_node = None
        self.cluster_state = None

        uuid = config.getUUID()
        if uuid:
            self.uuid = uuid

        # election related data
        self.unconnected_master_node_set = set()
        self.negotiating_master_node_set = set()
        self.master_address_dict = weakref.WeakKeyDictionary()

        self._current_manager = None

        # backup
        upstream_cluster = config.getUpstreamCluster()
        if upstream_cluster:
            if upstream_cluster == self.name:
                raise ValueError("upstream cluster name must be"
                                 " different from cluster name")
            self.backup_app = BackupApplication(self, upstream_cluster,
                                                config.getUpstreamMasters())

        self.administration_handler = administration.AdministrationHandler(
            self)
        self.secondary_master_handler = secondary.SecondaryMasterHandler(self)
        self.client_service_handler = client.ClientServiceHandler(self)
        self.storage_service_handler = storage.StorageServiceHandler(self)

        registerLiveDebugger(on_log=self.log)
示例#51
0
 def stop(self):
     # Close any open connection to an upstream storage,
     # possibly aborting current replication.
     node = self.current_node
     if node is not None is node.getUUID():
         self.cancel()
     # Cancel all replication orders from upstream cluster.
     for offset in self.replicate_dict.keys():
         addr, name = self.source_dict.get(offset, (None, None))
         if name:
             tid = self.replicate_dict.pop(offset)
             logging.info('cancel replication of partition %u from %r'
                          ' up to %s', offset, addr, dump(tid))
     # Make UP_TO_DATE cells really UP_TO_DATE
     self._nextPartition()
示例#52
0
 def announcePrimary(self, conn):
     app = self.app
     if app.primary:
         # I am also the primary... So restart the election.
         raise ElectionFailure, 'another primary arises'
     try:
         address = app.master_address_dict[conn]
         assert conn.isServer()
     except KeyError:
         address = conn.getAddress()
         assert conn.isClient()
     app.primary = False
     app.primary_master_node = node = app.nm.getByAddress(address)
     app.negotiating_master_node_set.clear()
     logging.info('%s is the primary', node)
示例#53
0
    def _setupNode(self, conn, node_type, uuid, address, node):
        app = self.app
        if node:
            if node.isRunning():
                # cloned/evil/buggy node connecting to us
                raise ProtocolError('already connected')
            else:
                assert not node.isConnected()
            node.setAddress(address)
            node.setRunning()

        state = NodeStates.RUNNING
        if node_type == NodeTypes.CLIENT:
            if app.cluster_state != ClusterStates.RUNNING:
                raise NotReadyError
            handler = app.client_service_handler
            human_readable_node_type = ' client '
        elif node_type == NodeTypes.STORAGE:
            if app.cluster_state == ClusterStates.STOPPING_BACKUP:
                raise NotReadyError
            manager = app._current_manager
            if manager is None:
                manager = app
            state, handler = manager.identifyStorageNode(
                uuid is not None and node is not None)
            human_readable_node_type = ' storage (%s) ' % (state, )
        elif node_type == NodeTypes.MASTER:
            handler = app.secondary_master_handler
            human_readable_node_type = ' master '
        elif node_type == NodeTypes.ADMIN:
            handler = app.administration_handler
            human_readable_node_type = 'n admin '
        else:
            raise NotImplementedError(node_type)

        uuid = app.getNewUUID(uuid, address, node_type)
        logging.info('Accept a' + human_readable_node_type + uuid_str(uuid))
        if node is None:
            node = app.nm.createFromNodeType(node_type,
                uuid=uuid, address=address)
        node.setUUID(uuid)
        node.setState(state)
        node.setConnection(conn)
        conn.setHandler(handler)
        app.broadcastNodesInformation([node], node)
        return uuid
示例#54
0
文件: storage.py 项目: Nexedi/neoppod
 def answerStoreObject(self, conn, conflicting, oid, serial):
     txn_context = self.app.getHandlerData()
     object_stored_counter_dict = txn_context[
         'object_stored_counter_dict'][oid]
     if conflicting:
         # Warning: if a storage (S1) is much faster than another (S2), then
         # we may process entirely a conflict with S1 (i.e. we received the
         # answer to the store of the resolved object on S1) before we
         # receive the conflict answer from the first store on S2.
         logging.info('%r report a conflict for %r with %r',
                      conn, dump(oid), dump(serial))
         # If this conflict is not already resolved, mark it for
         # resolution.
         if serial not in txn_context[
                 'resolved_conflict_serial_dict'].get(oid, ()):
             if serial in object_stored_counter_dict and serial != ZERO_TID:
                 raise NEOStorageError('Storages %s accepted object %s'
                     ' for serial %s but %s reports a conflict for it.' % (
                     map(dump, object_stored_counter_dict[serial]),
                     dump(oid), dump(serial), dump(conn.getUUID())))
             conflict_serial_dict = txn_context['conflict_serial_dict']
             conflict_serial_dict.setdefault(oid, set()).add(serial)
     else:
         uuid_set = object_stored_counter_dict.get(serial)
         if uuid_set is None: # store to first storage node
             object_stored_counter_dict[serial] = uuid_set = set()
             try:
                 data = txn_context['data_dict'].pop(oid)
             except KeyError: # multiple undo
                 assert txn_context['cache_dict'][oid] is None, oid
             else:
                 if type(data) is str:
                     size = len(data)
                     txn_context['data_size'] -= size
                     size += txn_context['cache_size']
                     if size < self.app._cache._max_size:
                         txn_context['cache_size'] = size
                     else:
                         # Do not cache data past cache max size, as it
                         # would just flush it on tpc_finish. This also
                         # prevents memory errors for big transactions.
                         data = None
                 txn_context['cache_dict'][oid] = data
         else: # replica
             assert oid not in txn_context['data_dict'], oid
         uuid_set.add(conn.getUUID())
示例#55
0
 def _initNodeConnection(self, node):
     """Init a connection to a given storage node."""
     app = self.app
     logging.debug('trying to connect to %s - %s', node, node.getState())
     conn = MTClientConnection(app, app.storage_event_handler, node,
                               dispatcher=app.dispatcher)
     p = Packets.RequestIdentification(NodeTypes.CLIENT,
         app.uuid, None, app.name)
     try:
         app._ask(conn, p, handler=app.storage_bootstrap_handler)
     except ConnectionClosed:
         logging.error('Connection to %r failed', node)
     except NodeNotReady:
         logging.info('%r not ready', node)
     else:
         logging.info('Connected %r', node)
         return conn
     self.notifyFailure(node)
示例#56
0
 def notifyNodeInformation(self, conn, node_list):
     """Store information on nodes, only if this is sent by a primary
     master node."""
     self.app.nm.update(node_list)
     for node_type, addr, uuid, state in node_list:
         if uuid == self.app.uuid:
             # This is me, do what the master tell me
             logging.info("I was told I'm %s", state)
             if state in (NodeStates.DOWN, NodeStates.TEMPORARILY_DOWN,
                     NodeStates.BROKEN, NodeStates.UNKNOWN):
                 erase = state == NodeStates.DOWN
                 self.app.shutdown(erase=erase)
             elif state == NodeStates.HIDDEN:
                 raise StoppedOperation
         elif node_type == NodeTypes.CLIENT and state != NodeStates.RUNNING:
             logging.info('Notified of non-running client, abort (%s)',
                     uuid_str(uuid))
             self.app.tm.abortFor(uuid)
示例#57
0
文件: mysqldb.py 项目: Nexedi/neoppod
 def query(self, query):
     """Query data from a database."""
     if LOG_QUERIES:
         logging.debug('querying %s...',
             getPrintableQuery(query.split('\n', 1)[0][:70]))
     while 1:
         conn = self.conn
         try:
             conn.query(query)
             if query.startswith("SELECT "):
                 r = conn.store_result()
                 return tuple([
                     tuple([d.tostring() if isinstance(d, array) else d
                           for d in row])
                     for row in r.fetch_row(r.num_rows())])
             break
         except OperationalError, m:
             if self._active or m[0] not in (SERVER_GONE_ERROR, SERVER_LOST):
                 raise DatabaseFailure('MySQL error %d: %s\nQuery: %s'
                     % (m[0], m[1], getPrintableQuery(query[:1000])))
             logging.info('the MySQL server is gone; reconnecting')
             self._connect()
示例#58
0
文件: app.py 项目: Nexedi/neoppod
    def shutdown(self):
        """Close all connections and exit"""
        self.changeClusterState(ClusterStates.STOPPING)
        self.listening_conn.close()
        for conn in self.em.getConnectionList():
            node = self.nm.getByUUID(conn.getUUID())
            if node is None or not node.isIdentified():
                conn.close()
        # No need to change handlers in order to reject RequestIdentification
        # & AskBeginTransaction packets because they won't be any:
        # the only remaining connected peers are identified non-clients
        # and we don't accept new connections anymore.
        try:
            # wait for all transaction to be finished
            while self.tm.hasPending():
                self.em.poll(1)
        except StoppedOperation:
            logging.critical('No longer operational')

        logging.info("asking remaining nodes to shutdown")
        handler = EventHandler(self)
        for node in self.nm.getConnectedList():
            conn = node.getConnection()
            if node.isStorage():
                conn.setHandler(handler)
                conn.notify(Packets.NotifyNodeInformation(((
                  node.getType(), node.getAddress(), node.getUUID(),
                  NodeStates.TEMPORARILY_DOWN),)))
                conn.abort()
            elif conn.pending():
                conn.abort()
            else:
                conn.close()

        while self.em.connection_dict:
            self.em.poll(1)

        # then shutdown
        sys.exit()