示例#1
0
 def run(self):
     if servers.is_server_running():
         print 'A MemSQL Loader server is currently running.'
         sys.exit(0)
     else:
         print 'No currently running servers.'
         sys.exit(1)
示例#2
0
 def run(self):
     if servers.is_server_running():
         print 'A MemSQL Loader server is currently running.'
         sys.exit(0)
     else:
         print 'No currently running servers.'
         sys.exit(1)
示例#3
0
 def run(self):
     if not servers.is_server_running():
         print 'No currently running servers'
         sys.exit(0)
     pid = servers.get_server_pid()
     try:
         os.kill(pid, signal.SIGQUIT)
         print 'Stopped server with PID %s' % pid
     except os.error as e:
         print 'Error killing server with PID %s: %s' % (pid, e)
示例#4
0
    def run(self):
        if not self.options.force:
            if servers.is_server_running():
                print 'Please stop any currently-running servers with stop-server before deleting the MemSQL Loader database.'
                sys.exit(1)

        prompt = 'Are you sure you want to delete the MemSQL Loader database?\nThe database contains queued, running, and finished jobs.'
        if not cli_utils.confirm(prompt, default=False):
            print 'Exiting.'
            sys.exit(1)
        LoaderStorage.drop_database()
        print 'MemSQL Loader database deleted.'
示例#5
0
    def run(self):
        signal.signal(signal.SIGINT, self.stop)
        signal.signal(signal.SIGQUIT, self.stop)
        signal.signal(signal.SIGTERM, self.stop)

        self.exiting = False
        self.logger = log.get_logger('Server')

        if self.options.num_workers is not None and self.options.num_workers < 1:
            self.logger.error('number of workers must be a positive integer')
            sys.exit(1)

        if self.options.idle_timeout is not None and self.options.idle_timeout < 1:
            self.logger.error('idle timeout must be a positive integer')
            sys.exit(1)

        # switch over to the correct user as soon as possible
        if self.options.set_user is not None:
            if not setuser(self.options.set_user):
                self.logger.error('failed to switch to user %s' %
                                  self.options.set_user)
                sys.exit(1)

        if servers.is_server_running():
            self.logger.error('A MemSQL Loader server is already running.')
            sys.exit(1)

        if self.options.daemonize:
            # ensure connection pool forks from daemon
            pool.close_connections()
            with storage.LoaderStorage.fork_wrapper():
                daemonize(self.options.log_path)
            pool.recreate_pool()

        # record the fact that we've started successfully
        servers.write_pid_file()

        if self.options.num_workers > WORKER_WARN_THRESHOLD and not self.options.force_workers:
            if not cli_utils.confirm(
                    'Are you sure you want to start %d workers? This is potentially dangerous.'
                    % self.options.num_workers,
                    default=False):
                print 'Exiting.'
                sys.exit(1)

        self.logger.debug('Starting worker pool')
        self.pool = WorkerPool(num_workers=self.options.num_workers,
                               idle_timeout=self.options.idle_timeout)

        print 'MemSQL Loader Server running'

        loader_db_name = storage.MEMSQL_LOADER_DB
        has_valid_loader_db_conn = False
        while not self.exiting:
            try:
                if bootstrap.check_bootstrapped():
                    has_valid_loader_db_conn = True
                    if self.pool.poll():
                        time.sleep(1)
                    else:
                        self.logger.info(
                            'Server has been idle for more than the idle timeout (%d seconds). Stopping.',
                            self.options.idle_timeout)
                        self.exit()
                else:
                    if has_valid_loader_db_conn:
                        self.logger.warn(
                            'The %s database is unreachable or not ready; stopping worker pool',
                            loader_db_name)
                        self.pool.stop()
                    has_valid_loader_db_conn = False
                    time.sleep(5)
            except KeyboardInterrupt:
                break

        self.stop()
示例#6
0
    def queue_job(self):
        all_keys = list(self.job.get_files(s3_conn=self.s3_conn))

        paths = self.job.spec.source.paths

        if self.options.dry_run:
            print "DRY RUN SUMMARY:"
            print "----------------"
            if len(all_keys) == 0:
                print "Paths %s matched no files" % ([str(p) for p in paths])
            else:
                print "List of files to load:"
                for key in all_keys:
                    print key.name
                print "Example LOAD DATA statement to execute:"
                file_id = self.job.get_file_id(all_keys[0])
                print load_data.build_example_query(self.job, file_id)
            sys.exit(0)
        elif len(all_keys) == 0:
            self.logger.warning(
                "Paths %s matched no files. Please check your path specification (be careful with relative paths)."
                % ([str(p) for p in paths]))

        self.jobs = None
        spec = self.job.spec
        try:
            self.logger.info('Creating job')
            self.jobs = Jobs()
            self.jobs.save(self.job)

            self.tasks = Tasks()

            etags = []
            for key in all_keys:
                if key.scheme in ['s3', 'hdfs']:
                    etags.append(key.etag)

            if etags and not self.options.force:
                database, table = spec.target.database, spec.target.table
                host, port = spec.connection.host, spec.connection.port
                competing_job_ids = [
                    j.id for j in self.jobs.query_target(
                        host, port, database, table)
                ]
                md5_map = self.get_current_tasks_md5_map(
                    etags, competing_job_ids)
            else:
                # For files loading on the filesystem, we are not going to MD5 files
                # for performance reasons. We are also basing this on the assumption
                # that filesystem loads are generally a one-time operation.
                md5_map = None
                if self.options.force:
                    self.logger.info(
                        'Loading all files in this job, regardless of identical files that are currently loading or were previously loaded (because of the --force flag)'
                    )
                if self.job.spec.options.file_id_column is not None:
                    self.logger.info(
                        'Since you\'re using file_id_column, duplicate records will be checked and avoided'
                    )

            count = self.submit_files(all_keys, md5_map, self.job,
                                      self.options.force)

            if count == 0:
                self.logger.info('Deleting the job, it has no child tasks')
                try:
                    self.jobs.delete(self.job)
                except:
                    self.logger.error("Rollback failed for job: %s",
                                      self.job.id)
            else:
                self.logger.info("Successfully queued job with id: %s",
                                 self.job.id)

                if not servers.is_server_running():
                    self.start_server()

                if self.options.sync:
                    self.wait_for_job()

        except (Exception, AssertionError):
            self.logger.error(
                'Failed to submit files, attempting to roll back job creation...'
            )
            exc_info = sys.exc_info()
            if self.jobs is not None:
                try:
                    self.jobs.delete(self.job)
                except:
                    self.logger.error("Rollback failed for job: %s",
                                      self.job.id)
            # Have to use this old-style raise because raise just throws
            # the last exception that occured, which could be the one in
            # the above try/except block and not the original exception.
            raise exc_info[0], exc_info[1], exc_info[2]
示例#7
0
    def queue_job(self):
        all_keys = list(self.job.get_files(s3_conn=self.s3_conn))

        paths = self.job.spec.source.paths

        if self.options.dry_run:
            print "DRY RUN SUMMARY:"
            print "----------------"
            if len(all_keys) == 0:
                print "Paths %s matched no files" % ([str(p) for p in paths])
            else:
                print "List of files to load:"
                for key in all_keys:
                    print key.name
                print "Example LOAD DATA statement to execute:"
                file_id = self.job.get_file_id(all_keys[0])
                print load_data.build_example_query(self.job, file_id)
            sys.exit(0)
        elif len(all_keys) == 0:
            self.logger.warning("Paths %s matched no files. Please check your path specification (be careful with relative paths)." % ([str(p) for p in paths]))

        self.jobs = None
        spec = self.job.spec
        try:
            self.logger.info('Creating job')
            self.jobs = Jobs()
            self.jobs.save(self.job)

            self.tasks = Tasks()

            etags = []
            for key in all_keys:
                if key.scheme in ['s3', 'hdfs']:
                    etags.append(key.etag)

            if etags and not self.options.force:
                database, table = spec.target.database, spec.target.table
                host, port = spec.connection.host, spec.connection.port
                competing_job_ids = [j.id for j in self.jobs.query_target(host, port, database, table)]
                md5_map = self.get_current_tasks_md5_map(etags, competing_job_ids)
            else:
                # For files loading on the filesystem, we are not going to MD5 files
                # for performance reasons. We are also basing this on the assumption
                # that filesystem loads are generally a one-time operation.
                md5_map = None
                if self.options.force:
                    self.logger.info('Loading all files in this job, regardless of identical files that are currently loading or were previously loaded (because of the --force flag)')
                if self.job.spec.options.file_id_column is not None:
                    self.logger.info('Since you\'re using file_id_column, duplicate records will be checked and avoided')

            count = self.submit_files(all_keys, md5_map, self.job, self.options.force)

            if count == 0:
                self.logger.info('Deleting the job, it has no child tasks')
                try:
                    self.jobs.delete(self.job)
                except:
                    self.logger.error("Rollback failed for job: %s", self.job.id)
            else:
                self.logger.info("Successfully queued job with id: %s", self.job.id)

                if not servers.is_server_running():
                    self.start_server()

                if self.options.sync:
                    self.wait_for_job()

        except (Exception, AssertionError):
            self.logger.error('Failed to submit files, attempting to roll back job creation...')
            exc_info = sys.exc_info()
            if self.jobs is not None:
                try:
                    self.jobs.delete(self.job)
                except:
                    self.logger.error("Rollback failed for job: %s", self.job.id)
            # Have to use this old-style raise because raise just throws
            # the last exception that occured, which could be the one in
            # the above try/except block and not the original exception.
            raise exc_info[0], exc_info[1], exc_info[2]
示例#8
0
    def run(self):
        signal.signal(signal.SIGINT, self.stop)
        signal.signal(signal.SIGQUIT, self.stop)
        signal.signal(signal.SIGTERM, self.stop)

        self.exiting = False
        self.logger = log.get_logger('Server')

        if self.options.num_workers is not None and self.options.num_workers < 1:
            self.logger.error('number of workers must be a positive integer')
            sys.exit(1)

        if self.options.idle_timeout is not None and self.options.idle_timeout < 1:
            self.logger.error('idle timeout must be a positive integer')
            sys.exit(1)

        # switch over to the correct user as soon as possible
        if self.options.set_user is not None:
            if not setuser(self.options.set_user):
                self.logger.error('failed to switch to user %s' % self.options.set_user)
                sys.exit(1)

        if servers.is_server_running():
            self.logger.error('A MemSQL Loader server is already running.')
            sys.exit(1)

        if self.options.daemonize:
            # ensure connection pool forks from daemon
            pool.close_connections()
            with storage.LoaderStorage.fork_wrapper():
                daemonize(self.options.log_path)
            pool.recreate_pool()

        # record the fact that we've started successfully
        servers.write_pid_file()

        if self.options.num_workers > WORKER_WARN_THRESHOLD and not self.options.force_workers:
            if not cli_utils.confirm('Are you sure you want to start %d workers? This is potentially dangerous.' % self.options.num_workers, default=False):
                print 'Exiting.'
                sys.exit(1)

        self.logger.debug('Starting worker pool')
        self.pool = WorkerPool(num_workers=self.options.num_workers, idle_timeout=self.options.idle_timeout)

        print 'MemSQL Loader Server running'

        loader_db_name = storage.MEMSQL_LOADER_DB
        has_valid_loader_db_conn = False
        while not self.exiting:
            try:
                if bootstrap.check_bootstrapped():
                    has_valid_loader_db_conn = True
                    if self.pool.poll():
                        time.sleep(1)
                    else:
                        self.logger.info('Server has been idle for more than the idle timeout (%d seconds). Stopping.', self.options.idle_timeout)
                        self.exit()
                else:
                    if has_valid_loader_db_conn:
                        self.logger.warn('The %s database is unreachable or not ready; stopping worker pool', loader_db_name)
                        self.pool.stop()
                    has_valid_loader_db_conn = False
                    time.sleep(5)
            except KeyboardInterrupt:
                break

        self.stop()