Exemplo n.º 1
0
    def test_it_gives_up_after_30_seconds(self, now_mock):
        start_datetime = datetime.datetime(2018, 5, 9, 16, 0, 0)
        start_time = time.mktime(start_datetime.timetuple())
        now_mock.configure_mock(return_value=start_time)

        def fail_for_half_a_minute():
            new_time = start_time
            for i in xrange(2):
                # leap forward 15 seconds
                new_time += self.timeout / 2
                now_mock.configure_mock(return_value=new_time)
                yield pgdb.DatabaseError

        self.connect.configure_mock(side_effect=fail_for_half_a_minute())

        reconfigurer = SegmentReconfigurer(logger=self.logger,
                                           worker_pool=self.worker_pool,
                                           timeout=self.timeout)
        with self.assertRaises(RuntimeError) as context:
            reconfigurer.reconfigure()
            self.assertEqual(
                "Mirror promotion did not complete in {0} seconds.".format(
                    self.timeout), context.exception.message)

        self.connect.assert_has_calls([
            call(self.db_url),
            call(self.db_url),
        ])
        self.conn.close.assert_has_calls([])
Exemplo n.º 2
0
    def test_it_retries_the_connection(self):
        self.connect.configure_mock(side_effect=[pgdb.DatabaseError, pgdb.DatabaseError, self.conn])

        reconfigurer = SegmentReconfigurer(logger=self.logger,
                worker_pool=self.worker_pool, timeout=self.timeout)
        reconfigurer.reconfigure()

        self.connect.assert_has_calls([call(self.db_url), call(self.db_url), call(self.db_url), ])
        self.conn.close.assert_any_call()
    def test_it_retries_the_connection(self):
        self.connect.configure_mock(side_effect=[pgdb.DatabaseError, pgdb.DatabaseError, self.conn])

        reconfigurer = SegmentReconfigurer(logger=self.logger,
                worker_pool=self.worker_pool, timeout=self.timeout)
        reconfigurer.reconfigure()

        self.connect.assert_has_calls([call(self.db_url), call(self.db_url), call(self.db_url), ])
        self.conn.close.assert_any_call()
Exemplo n.º 4
0
 def test_it_triggers_fts_probe(self):
     reconfigurer = SegmentReconfigurer(logger=self.logger,
             worker_pool=self.worker_pool, timeout=self.timeout)
     reconfigurer.reconfigure()
     pg.connect.assert_has_calls([
         call(dbname=self.db, host=self.host, port=self.port, opt=None, user=self.user, passwd=self.passwd),
         call().query(FTS_PROBE_QUERY),
         call().close(),
         ]
         )
 def test_it_triggers_fts_probe(self):
     reconfigurer = SegmentReconfigurer(logger=self.logger,
             worker_pool=self.worker_pool, timeout=self.timeout)
     reconfigurer.reconfigure()
     pygresql.pg.connect.assert_has_calls([
         call(self.db, self.host, self.port, None, self.user, self.passwd),
         call().query(FTS_PROBE_QUERY),
         call().close(),
         ]
         )
    def test_it_gives_up_after_30_seconds(self, now_mock):
        start_datetime = datetime.datetime(2018, 5, 9, 16, 0, 0)
        start_time = time.mktime(start_datetime.timetuple())
        now_mock.configure_mock(return_value=start_time)

        def fail_for_half_a_minute():
            new_time = start_time
            for i in xrange(2):
                # leap forward 15 seconds
                new_time += self.timeout / 2
                now_mock.configure_mock(return_value=new_time)
                yield pgdb.DatabaseError


        self.connect.configure_mock(side_effect=fail_for_half_a_minute())

        reconfigurer = SegmentReconfigurer(logger=self.logger,
                worker_pool=self.worker_pool, timeout=self.timeout)
        with self.assertRaises(RuntimeError) as context:
            reconfigurer.reconfigure()
            self.assertEqual("Mirror promotion did not complete in {0} seconds.".format(self.timeout), context.exception.message)

        self.connect.assert_has_calls([call(self.db_url), call(self.db_url), ])
        self.conn.close.assert_has_calls([])
Exemplo n.º 7
0
    def rebalance(self):
        self.logger.info("Determining primary and mirror segment pairs to rebalance")

        # The current implementation of rebalance calls "gprecoverseg -a" below.
        # Thus, if another balanced pair is not synchronized, or has a down mirror
        # that pair will be recovered as a side-effect of rebalancing.
        unbalanced_primary_segs = []
        for segmentPair in self.gpArray.segmentPairs:
            if segmentPair.balanced():
                continue

            if segmentPair.up() and segmentPair.reachable() and segmentPair.synchronized():
                unbalanced_primary_segs.append(segmentPair.primaryDB)
            else:
                self.logger.warning(
                    "Not rebalancing primary segment dbid %d with its mirror dbid %d because one is either down, unreachable, or not synchronized" \
                    % (segmentPair.primaryDB.dbid, segmentPair.mirrorDB.dbid))

        if not len(unbalanced_primary_segs):
            self.logger.info("No segments to rebalance")
            return True

        unbalanced_primary_segs = GpArray.getSegmentsByHostName(unbalanced_primary_segs)

        pool = base.WorkerPool(min(len(unbalanced_primary_segs), self.batch_size))
        try:
            # Disable ctrl-c
            signal.signal(signal.SIGINT, signal.SIG_IGN)

            self.logger.info("Stopping unbalanced primary segments...")
            for hostname in list(unbalanced_primary_segs.keys()):
                cmd = GpSegStopCmd("stop unbalanced primary segs",
                                   self.gpEnv.getGpHome(),
                                   self.gpEnv.getGpVersion(),
                                   'fast',
                                   unbalanced_primary_segs[hostname],
                                   ctxt=base.REMOTE,
                                   remoteHost=hostname,
                                   timeout=600,
                                   segment_batch_size=self.segment_batch_size)
                pool.addCommand(cmd)

            base.join_and_indicate_progress(pool)
            
            failed_count = 0
            completed = pool.getCompletedItems()
            for res in completed:
                if not res.get_results().wasSuccessful():
                    failed_count += 1

            allSegmentsStopped = (failed_count == 0)

            if not allSegmentsStopped:
                self.logger.warn("%d segments failed to stop.  A full rebalance of the" % failed_count)
                self.logger.warn("system is not possible at this time.  Please check the")
                self.logger.warn("log files, correct the problem, and run gprecoverseg -r")
                self.logger.warn("again.")
                self.logger.info("gprecoverseg will continue with a partial rebalance.")

            pool.empty_completed_items()
            segment_reconfigurer = SegmentReconfigurer(logger=self.logger,
                    worker_pool=pool, timeout=MIRROR_PROMOTION_TIMEOUT)
            segment_reconfigurer.reconfigure()

            # Final step is to issue a recoverseg operation to resync segments
            self.logger.info("Starting segment synchronization")
            original_sys_args = sys.argv[:]
            self.logger.info("=============================START ANOTHER RECOVER=========================================")
            # import here because GpRecoverSegmentProgram and GpSegmentRebalanceOperation have a circular dependency
            from gppylib.programs.clsRecoverSegment import GpRecoverSegmentProgram
            cmd_args = ['gprecoverseg', '-a', '-B', str(self.batch_size), '-b', str(self.segment_batch_size)]
            sys.argv = cmd_args[:]
            local_parser = GpRecoverSegmentProgram.createParser()
            local_options, args = local_parser.parse_args()
            recover_cmd = GpRecoverSegmentProgram.createProgram(local_options, args)
            try:
                recover_cmd.run()
            except SystemExit as e:
                if e.code != 0:
                    self.logger.error("Failed to start the synchronization step of the segment rebalance.")
                    self.logger.error("Check the gprecoverseg log file, correct any problems, and re-run")
                    self.logger.error(' '.join(cmd_args))
                    raise Exception("Error synchronizing.\nError: %s" % str(e))
            finally:
                if recover_cmd:
                    recover_cmd.cleanup()
                sys.argv = original_sys_args
                self.logger.info("==============================END ANOTHER RECOVER==========================================")

        except Exception as ex:
            raise ex
        finally:
            pool.join()
            pool.haltWork()
            pool.joinWorkers()
            signal.signal(signal.SIGINT, signal.default_int_handler)

        return allSegmentsStopped # if all segments stopped, then a full rebalance was done
Exemplo n.º 8
0
    def rebalance(self):
        # Get the unbalanced primary segments grouped by hostname
        # These segments are what we will shutdown.
        self.logger.info("Getting unbalanced segments")
        unbalanced_primary_segs = GpArray.getSegmentsByHostName(
            self.gpArray.get_unbalanced_primary_segdbs())
        pool = base.WorkerPool()

        try:
            # Disable ctrl-c
            signal.signal(signal.SIGINT, signal.SIG_IGN)

            self.logger.info("Stopping unbalanced primary segments...")
            for hostname in unbalanced_primary_segs.keys():
                cmd = GpSegStopCmd("stop unbalanced primary segs",
                                   self.gpEnv.getGpHome(),
                                   self.gpEnv.getGpVersion(),
                                   'fast',
                                   unbalanced_primary_segs[hostname],
                                   ctxt=base.REMOTE,
                                   remoteHost=hostname,
                                   timeout=600)
                pool.addCommand(cmd)

            base.join_and_indicate_progress(pool)

            failed_count = 0
            completed = pool.getCompletedItems()
            for res in completed:
                if not res.get_results().wasSuccessful():
                    failed_count += 1

            allSegmentsStopped = (failed_count == 0)

            if not allSegmentsStopped:
                self.logger.warn(
                    "%d segments failed to stop.  A full rebalance of the")
                self.logger.warn(
                    "system is not possible at this time.  Please check the")
                self.logger.warn(
                    "log files, correct the problem, and run gprecoverseg -r")
                self.logger.warn("again.")
                self.logger.info(
                    "gprecoverseg will continue with a partial rebalance.")

            pool.empty_completed_items()
            segment_reconfigurer = SegmentReconfigurer(
                logger=self.logger,
                worker_pool=pool,
                timeout=MIRROR_PROMOTION_TIMEOUT)
            segment_reconfigurer.reconfigure()

            # Final step is to issue a recoverseg operation to resync segments
            self.logger.info("Starting segment synchronization")
            original_sys_args = sys.argv[:]
            try:
                self.logger.info(
                    "=============================START ANOTHER RECOVER========================================="
                )
                # import here because GpRecoverSegmentProgram and GpSegmentRebalanceOperation have a circular dependency
                from gppylib.programs.clsRecoverSegment import GpRecoverSegmentProgram
                sys.argv = ['gprecoverseg', '-a']
                local_parser = GpRecoverSegmentProgram.createParser()
                local_options, args = local_parser.parse_args()
                cmd = GpRecoverSegmentProgram.createProgram(
                    local_options, args)
                cmd.run()

            except SystemExit as e:
                if e.code != 0:
                    self.logger.error(
                        "Failed to start the synchronization step of the segment rebalance."
                    )
                    self.logger.error(
                        "Check the gprecoverseg log file, correct any problems, and re-run"
                    )
                    self.logger.error("'gprecoverseg -a'.")
                    raise Exception("Error synchronizing.\nError: %s" % str(e))
            finally:
                if cmd:
                    cmd.cleanup()
                sys.argv = original_sys_args
                self.logger.info(
                    "==============================END ANOTHER RECOVER=========================================="
                )

        except Exception, ex:
            raise ex
Exemplo n.º 9
0
    def rebalance(self):
        # Get the unbalanced primary segments grouped by hostname
        # These segments are what we will shutdown.
        self.logger.info("Getting unbalanced segments")
        unbalanced_primary_segs = GpArray.getSegmentsByHostName(self.gpArray.get_unbalanced_primary_segdbs())
        pool = base.WorkerPool()

        try:
            # Disable ctrl-c
            signal.signal(signal.SIGINT, signal.SIG_IGN)

            self.logger.info("Stopping unbalanced primary segments...")
            for hostname in unbalanced_primary_segs.keys():
                cmd = GpSegStopCmd("stop unbalanced primary segs",
                                   self.gpEnv.getGpHome(),
                                   self.gpEnv.getGpVersion(),
                                   'fast',
                                   unbalanced_primary_segs[hostname],
                                   ctxt=base.REMOTE,
                                   remoteHost=hostname,
                                   timeout=600)
                pool.addCommand(cmd)

            base.join_and_indicate_progress(pool)
            
            failed_count = 0
            completed = pool.getCompletedItems()
            for res in completed:
                if not res.get_results().wasSuccessful():
                    failed_count += 1

            allSegmentsStopped = (failed_count == 0)

            if not allSegmentsStopped:
                self.logger.warn("%d segments failed to stop.  A full rebalance of the")
                self.logger.warn("system is not possible at this time.  Please check the")
                self.logger.warn("log files, correct the problem, and run gprecoverseg -r")
                self.logger.warn("again.")
                self.logger.info("gprecoverseg will continue with a partial rebalance.")

            pool.empty_completed_items()
            segment_reconfigurer = SegmentReconfigurer(logger=self.logger,
                    worker_pool=pool, timeout=MIRROR_PROMOTION_TIMEOUT)
            segment_reconfigurer.reconfigure()

            # Final step is to issue a recoverseg operation to resync segments
            self.logger.info("Starting segment synchronization")
            original_sys_args = sys.argv[:]
            try:
                self.logger.info("=============================START ANOTHER RECOVER=========================================")
                # import here because GpRecoverSegmentProgram and GpSegmentRebalanceOperation have a circular dependency
                from gppylib.programs.clsRecoverSegment import GpRecoverSegmentProgram
                sys.argv = ['gprecoverseg', '-a']
                local_parser = GpRecoverSegmentProgram.createParser()
                local_options, args = local_parser.parse_args()
                cmd = GpRecoverSegmentProgram.createProgram(local_options, args)
                cmd.run()

            except SystemExit as e:
                if e.code != 0:
                    self.logger.error("Failed to start the synchronization step of the segment rebalance.")
                    self.logger.error("Check the gprecoverseg log file, correct any problems, and re-run")
                    self.logger.error("'gprecoverseg -a'.")
                    raise Exception("Error synchronizing.\nError: %s" % str(e))
            finally:
                if cmd:
                    cmd.cleanup()
                sys.argv = original_sys_args
                self.logger.info("==============================END ANOTHER RECOVER==========================================")

        except Exception, ex:
            raise ex