def run(self): if servers.is_server_running(): print 'A MemSQL Loader server is currently running.' sys.exit(0) else: print 'No currently running servers.' sys.exit(1)
def run(self): if not servers.is_server_running(): print 'No currently running servers' sys.exit(0) pid = servers.get_server_pid() try: os.kill(pid, signal.SIGQUIT) print 'Stopped server with PID %s' % pid except os.error as e: print 'Error killing server with PID %s: %s' % (pid, e)
def run(self): if not self.options.force: if servers.is_server_running(): print 'Please stop any currently-running servers with stop-server before deleting the MemSQL Loader database.' sys.exit(1) prompt = 'Are you sure you want to delete the MemSQL Loader database?\nThe database contains queued, running, and finished jobs.' if not cli_utils.confirm(prompt, default=False): print 'Exiting.' sys.exit(1) LoaderStorage.drop_database() print 'MemSQL Loader database deleted.'
def run(self): signal.signal(signal.SIGINT, self.stop) signal.signal(signal.SIGQUIT, self.stop) signal.signal(signal.SIGTERM, self.stop) self.exiting = False self.logger = log.get_logger('Server') if self.options.num_workers is not None and self.options.num_workers < 1: self.logger.error('number of workers must be a positive integer') sys.exit(1) if self.options.idle_timeout is not None and self.options.idle_timeout < 1: self.logger.error('idle timeout must be a positive integer') sys.exit(1) # switch over to the correct user as soon as possible if self.options.set_user is not None: if not setuser(self.options.set_user): self.logger.error('failed to switch to user %s' % self.options.set_user) sys.exit(1) if servers.is_server_running(): self.logger.error('A MemSQL Loader server is already running.') sys.exit(1) if self.options.daemonize: # ensure connection pool forks from daemon pool.close_connections() with storage.LoaderStorage.fork_wrapper(): daemonize(self.options.log_path) pool.recreate_pool() # record the fact that we've started successfully servers.write_pid_file() if self.options.num_workers > WORKER_WARN_THRESHOLD and not self.options.force_workers: if not cli_utils.confirm( 'Are you sure you want to start %d workers? This is potentially dangerous.' % self.options.num_workers, default=False): print 'Exiting.' sys.exit(1) self.logger.debug('Starting worker pool') self.pool = WorkerPool(num_workers=self.options.num_workers, idle_timeout=self.options.idle_timeout) print 'MemSQL Loader Server running' loader_db_name = storage.MEMSQL_LOADER_DB has_valid_loader_db_conn = False while not self.exiting: try: if bootstrap.check_bootstrapped(): has_valid_loader_db_conn = True if self.pool.poll(): time.sleep(1) else: self.logger.info( 'Server has been idle for more than the idle timeout (%d seconds). Stopping.', self.options.idle_timeout) self.exit() else: if has_valid_loader_db_conn: self.logger.warn( 'The %s database is unreachable or not ready; stopping worker pool', loader_db_name) self.pool.stop() has_valid_loader_db_conn = False time.sleep(5) except KeyboardInterrupt: break self.stop()
def queue_job(self): all_keys = list(self.job.get_files(s3_conn=self.s3_conn)) paths = self.job.spec.source.paths if self.options.dry_run: print "DRY RUN SUMMARY:" print "----------------" if len(all_keys) == 0: print "Paths %s matched no files" % ([str(p) for p in paths]) else: print "List of files to load:" for key in all_keys: print key.name print "Example LOAD DATA statement to execute:" file_id = self.job.get_file_id(all_keys[0]) print load_data.build_example_query(self.job, file_id) sys.exit(0) elif len(all_keys) == 0: self.logger.warning( "Paths %s matched no files. Please check your path specification (be careful with relative paths)." % ([str(p) for p in paths])) self.jobs = None spec = self.job.spec try: self.logger.info('Creating job') self.jobs = Jobs() self.jobs.save(self.job) self.tasks = Tasks() etags = [] for key in all_keys: if key.scheme in ['s3', 'hdfs']: etags.append(key.etag) if etags and not self.options.force: database, table = spec.target.database, spec.target.table host, port = spec.connection.host, spec.connection.port competing_job_ids = [ j.id for j in self.jobs.query_target( host, port, database, table) ] md5_map = self.get_current_tasks_md5_map( etags, competing_job_ids) else: # For files loading on the filesystem, we are not going to MD5 files # for performance reasons. We are also basing this on the assumption # that filesystem loads are generally a one-time operation. md5_map = None if self.options.force: self.logger.info( 'Loading all files in this job, regardless of identical files that are currently loading or were previously loaded (because of the --force flag)' ) if self.job.spec.options.file_id_column is not None: self.logger.info( 'Since you\'re using file_id_column, duplicate records will be checked and avoided' ) count = self.submit_files(all_keys, md5_map, self.job, self.options.force) if count == 0: self.logger.info('Deleting the job, it has no child tasks') try: self.jobs.delete(self.job) except: self.logger.error("Rollback failed for job: %s", self.job.id) else: self.logger.info("Successfully queued job with id: %s", self.job.id) if not servers.is_server_running(): self.start_server() if self.options.sync: self.wait_for_job() except (Exception, AssertionError): self.logger.error( 'Failed to submit files, attempting to roll back job creation...' ) exc_info = sys.exc_info() if self.jobs is not None: try: self.jobs.delete(self.job) except: self.logger.error("Rollback failed for job: %s", self.job.id) # Have to use this old-style raise because raise just throws # the last exception that occured, which could be the one in # the above try/except block and not the original exception. raise exc_info[0], exc_info[1], exc_info[2]
def queue_job(self): all_keys = list(self.job.get_files(s3_conn=self.s3_conn)) paths = self.job.spec.source.paths if self.options.dry_run: print "DRY RUN SUMMARY:" print "----------------" if len(all_keys) == 0: print "Paths %s matched no files" % ([str(p) for p in paths]) else: print "List of files to load:" for key in all_keys: print key.name print "Example LOAD DATA statement to execute:" file_id = self.job.get_file_id(all_keys[0]) print load_data.build_example_query(self.job, file_id) sys.exit(0) elif len(all_keys) == 0: self.logger.warning("Paths %s matched no files. Please check your path specification (be careful with relative paths)." % ([str(p) for p in paths])) self.jobs = None spec = self.job.spec try: self.logger.info('Creating job') self.jobs = Jobs() self.jobs.save(self.job) self.tasks = Tasks() etags = [] for key in all_keys: if key.scheme in ['s3', 'hdfs']: etags.append(key.etag) if etags and not self.options.force: database, table = spec.target.database, spec.target.table host, port = spec.connection.host, spec.connection.port competing_job_ids = [j.id for j in self.jobs.query_target(host, port, database, table)] md5_map = self.get_current_tasks_md5_map(etags, competing_job_ids) else: # For files loading on the filesystem, we are not going to MD5 files # for performance reasons. We are also basing this on the assumption # that filesystem loads are generally a one-time operation. md5_map = None if self.options.force: self.logger.info('Loading all files in this job, regardless of identical files that are currently loading or were previously loaded (because of the --force flag)') if self.job.spec.options.file_id_column is not None: self.logger.info('Since you\'re using file_id_column, duplicate records will be checked and avoided') count = self.submit_files(all_keys, md5_map, self.job, self.options.force) if count == 0: self.logger.info('Deleting the job, it has no child tasks') try: self.jobs.delete(self.job) except: self.logger.error("Rollback failed for job: %s", self.job.id) else: self.logger.info("Successfully queued job with id: %s", self.job.id) if not servers.is_server_running(): self.start_server() if self.options.sync: self.wait_for_job() except (Exception, AssertionError): self.logger.error('Failed to submit files, attempting to roll back job creation...') exc_info = sys.exc_info() if self.jobs is not None: try: self.jobs.delete(self.job) except: self.logger.error("Rollback failed for job: %s", self.job.id) # Have to use this old-style raise because raise just throws # the last exception that occured, which could be the one in # the above try/except block and not the original exception. raise exc_info[0], exc_info[1], exc_info[2]
def run(self): signal.signal(signal.SIGINT, self.stop) signal.signal(signal.SIGQUIT, self.stop) signal.signal(signal.SIGTERM, self.stop) self.exiting = False self.logger = log.get_logger('Server') if self.options.num_workers is not None and self.options.num_workers < 1: self.logger.error('number of workers must be a positive integer') sys.exit(1) if self.options.idle_timeout is not None and self.options.idle_timeout < 1: self.logger.error('idle timeout must be a positive integer') sys.exit(1) # switch over to the correct user as soon as possible if self.options.set_user is not None: if not setuser(self.options.set_user): self.logger.error('failed to switch to user %s' % self.options.set_user) sys.exit(1) if servers.is_server_running(): self.logger.error('A MemSQL Loader server is already running.') sys.exit(1) if self.options.daemonize: # ensure connection pool forks from daemon pool.close_connections() with storage.LoaderStorage.fork_wrapper(): daemonize(self.options.log_path) pool.recreate_pool() # record the fact that we've started successfully servers.write_pid_file() if self.options.num_workers > WORKER_WARN_THRESHOLD and not self.options.force_workers: if not cli_utils.confirm('Are you sure you want to start %d workers? This is potentially dangerous.' % self.options.num_workers, default=False): print 'Exiting.' sys.exit(1) self.logger.debug('Starting worker pool') self.pool = WorkerPool(num_workers=self.options.num_workers, idle_timeout=self.options.idle_timeout) print 'MemSQL Loader Server running' loader_db_name = storage.MEMSQL_LOADER_DB has_valid_loader_db_conn = False while not self.exiting: try: if bootstrap.check_bootstrapped(): has_valid_loader_db_conn = True if self.pool.poll(): time.sleep(1) else: self.logger.info('Server has been idle for more than the idle timeout (%d seconds). Stopping.', self.options.idle_timeout) self.exit() else: if has_valid_loader_db_conn: self.logger.warn('The %s database is unreachable or not ready; stopping worker pool', loader_db_name) self.pool.stop() has_valid_loader_db_conn = False time.sleep(5) except KeyboardInterrupt: break self.stop()