def __install_solr(dist_directory: str = MC_DIST_DIR, solr_version: str = MC_SOLR_VERSION) -> None: """Install Solr to distribution directory; lock directory before installing and unlock afterwards.""" if __solr_is_installed(dist_directory=dist_directory, solr_version=solr_version): raise McSolrRunException( "Solr %s is already installed in distribution directory '%s'." % (solr_version, dist_directory)) solr_path = __solr_path(dist_directory=dist_directory, solr_version=solr_version) log.info("Creating Solr directory...") mkdir_p(solr_path) installing_file_path = __solr_installing_file_path( dist_directory=dist_directory, solr_version=solr_version) log.info("Locking Solr directory for installation...") lock_file(installing_file_path, timeout=MC_INSTALL_TIMEOUT) # Waited for concurrent installation to finish? if __solr_is_installed(dist_directory=dist_directory, solr_version=solr_version): log.info( "While waiting for Solr directory to unlock, Solr got installed to said directory." ) return solr_dist_url = __solr_dist_url(solr_version=solr_version) log.info("Downloading Solr %s from %s..." % (solr_version, solr_dist_url)) solr_tarball_dest_path = download_file_to_temp_path(solr_dist_url) log.info("Extracting %s to %s..." % (solr_tarball_dest_path, solr_path)) extract_tarball_to_directory(archive_file=solr_tarball_dest_path, dest_directory=solr_path, strip_root=True) # Solr needs its .war extracted first before ZkCLI is usable jetty_home_path = __jetty_home_path(dist_directory=dist_directory, solr_version=solr_version) solr_war_dest_dir = os.path.join(jetty_home_path, "solr-webapp", "webapp") if not os.path.exists(os.path.join(solr_war_dest_dir, "index.html")): raise McSolrRunException("Solr's .war is not extracted at path %s" % solr_war_dest_dir) log.info("Creating 'installed' file...") installed_file_path = __solr_installed_file_path( dist_directory=dist_directory, solr_version=solr_version) lock_file(installed_file_path) log.info("Removing lock file...") unlock_file(installing_file_path) if not __solr_is_installed(dist_directory=dist_directory, solr_version=solr_version): raise McSolrRunException( "I've done everything but Solr is still not installed.")
def __install_zookeeper(dist_directory: str = MC_DIST_DIR, zookeeper_version: str = MC_ZOOKEEPER_VERSION) -> None: """Install ZooKeeper to distribution directory; lock directory before installing and unlock afterwards.""" if __zookeeper_is_installed(dist_directory=dist_directory, zookeeper_version=zookeeper_version): raise McZooKeeperRunException( "ZooKeeper %s is already installed in distribution directory '%s'." % (zookeeper_version, dist_directory)) zookeeper_path = __zookeeper_path(dist_directory=dist_directory, zookeeper_version=zookeeper_version) log.info("Creating ZooKeeper directory...") mkdir_p(zookeeper_path) installing_file_path = __zookeeper_installing_file_path( dist_directory=dist_directory, zookeeper_version=zookeeper_version) log.info("Locking ZooKeeper directory for installation...") lock_file(installing_file_path, timeout=MC_INSTALL_TIMEOUT) # Waited for concurrent installation to finish? if __zookeeper_is_installed(dist_directory=dist_directory, zookeeper_version=zookeeper_version): log.info( "While waiting for ZooKeeper directory to unlock, ZooKeeper got installed to said directory." ) return zookeeper_dist_url = __zookeeper_dist_url( zookeeper_version=zookeeper_version) log.info("Downloading ZooKeeper %s from %s..." % (zookeeper_version, zookeeper_dist_url)) zookeeper_tarball_dest_path = download_file_to_temp_path( source_url=zookeeper_dist_url) log.info("Extracting %s to %s..." % (zookeeper_tarball_dest_path, zookeeper_path)) extract_tarball_to_directory(archive_file=zookeeper_tarball_dest_path, dest_directory=zookeeper_path, strip_root=True) log.info("Creating 'installed' file...") installed_file_path = __zookeeper_installed_file_path( dist_directory=dist_directory, zookeeper_version=zookeeper_version) lock_file(installed_file_path) log.info("Removing lock file...") unlock_file(installing_file_path) if not __zookeeper_is_installed(dist_directory=dist_directory, zookeeper_version=zookeeper_version): raise McZooKeeperRunException( "I've done everything but ZooKeeper is still not installed.")
def test_mkdir_p(): temp_dir = tempfile.mkdtemp() test_dir = os.path.join(temp_dir, 'foo', 'bar', 'baz') assert os.path.isdir(test_dir) is False mc_paths.mkdir_p(test_dir) assert os.path.isdir(test_dir) is True # Try creating again mc_paths.mkdir_p(test_dir) assert os.path.isdir(test_dir) is True
def __install_solr(dist_directory: str = MC_DIST_DIR, solr_version: str = MC_SOLR_VERSION) -> None: """Install Solr to distribution directory; lock directory before installing and unlock afterwards.""" if __solr_is_installed(dist_directory=dist_directory, solr_version=solr_version): raise Exception("Solr %s is already installed in distribution directory '%s'." % (solr_version, dist_directory)) solr_path = __solr_path(dist_directory=dist_directory, solr_version=solr_version) l.info("Creating Solr directory...") mkdir_p(solr_path) installing_file_path = __solr_installing_file_path(dist_directory=dist_directory, solr_version=solr_version) l.info("Locking Solr directory for installation...") lock_file(installing_file_path, timeout=MC_INSTALL_TIMEOUT) # Waited for concurrent installation to finish? if __solr_is_installed(dist_directory=dist_directory, solr_version=solr_version): l.info("While waiting for Solr directory to unlock, Solr got installed to said directory.") return solr_dist_url = __solr_dist_url(solr_version=solr_version) l.info("Downloading Solr %s from %s..." % (solr_version, solr_dist_url)) solr_tarball_dest_path = download_file_to_temp_path(solr_dist_url) l.info("Extracting %s to %s..." % (solr_tarball_dest_path, solr_path)) extract_tarball_to_directory(archive_file=solr_tarball_dest_path, dest_directory=solr_path, strip_root=True) # Solr needs its .war extracted first before ZkCLI is usable jetty_home_path = __jetty_home_path(dist_directory=dist_directory, solr_version=solr_version) solr_war_dest_dir = os.path.join(jetty_home_path, "solr-webapp", "webapp") # Solr 5.5.2+ already has the .war extracted if not os.path.exists(os.path.join(solr_war_dest_dir, "index.html")): solr_war_path = os.path.join(jetty_home_path, "webapps", "solr.war") if not os.path.isfile(solr_war_path): raise Exception("Solr's .war file does not exist at path %s" % solr_war_path) solr_war_dest_dir = os.path.join(jetty_home_path, "solr-webapp", "webapp") l.info("Extracting solr.war at '%s' to '%s'..." % (solr_war_path, solr_war_dest_dir)) mkdir_p(solr_war_dest_dir) extract_zip_to_directory(archive_file=solr_war_path, dest_directory=solr_war_dest_dir) l.info("Creating 'installed' file...") installed_file_path = __solr_installed_file_path(dist_directory=dist_directory, solr_version=solr_version) lock_file(installed_file_path) l.info("Removing lock file...") unlock_file(installing_file_path) if not __solr_is_installed(dist_directory=dist_directory, solr_version=solr_version): raise Exception("I've done everything but Solr is still not installed.")
def test_relative_symlink(): temp_dir = tempfile.mkdtemp() source_dir = os.path.join(temp_dir, 'src', 'a', 'b', 'c') mc_paths.mkdir_p(source_dir) with open(os.path.join(source_dir, 'test.txt'), 'w') as fh: fh.write('foo') dest_dir = os.path.join(temp_dir, 'dst', 'd', 'e') mc_paths.mkdir_p(dest_dir) dest_symlink = os.path.join(dest_dir, 'f') mc_paths.relative_symlink(source=source_dir, link_name=dest_symlink) assert os.path.exists(dest_symlink) assert os.path.lexists(dest_symlink) assert os.path.islink(dest_symlink) assert os.path.exists(os.path.join(dest_symlink, 'test.txt'))
def __install_zookeeper(dist_directory: str = MC_DIST_DIR, zookeeper_version: str = MC_ZOOKEEPER_VERSION) -> None: """Install ZooKeeper to distribution directory; lock directory before installing and unlock afterwards.""" if __zookeeper_is_installed(dist_directory=dist_directory, zookeeper_version=zookeeper_version): raise McZooKeeperRunException("ZooKeeper %s is already installed in distribution directory '%s'." % ( zookeeper_version, dist_directory )) zookeeper_path = __zookeeper_path(dist_directory=dist_directory, zookeeper_version=zookeeper_version) log.info("Creating ZooKeeper directory...") mkdir_p(zookeeper_path) installing_file_path = __zookeeper_installing_file_path(dist_directory=dist_directory, zookeeper_version=zookeeper_version) log.info("Locking ZooKeeper directory for installation...") lock_file(installing_file_path, timeout=MC_INSTALL_TIMEOUT) # Waited for concurrent installation to finish? if __zookeeper_is_installed(dist_directory=dist_directory, zookeeper_version=zookeeper_version): log.info("While waiting for ZooKeeper directory to unlock, ZooKeeper got installed to said directory.") return zookeeper_dist_url = __zookeeper_dist_url(zookeeper_version=zookeeper_version) log.info("Downloading ZooKeeper %s from %s..." % (zookeeper_version, zookeeper_dist_url)) zookeeper_tarball_dest_path = download_file_to_temp_path(source_url=zookeeper_dist_url) log.info("Extracting %s to %s..." % (zookeeper_tarball_dest_path, zookeeper_path)) extract_tarball_to_directory(archive_file=zookeeper_tarball_dest_path, dest_directory=zookeeper_path, strip_root=True) log.info("Creating 'installed' file...") installed_file_path = __zookeeper_installed_file_path(dist_directory=dist_directory, zookeeper_version=zookeeper_version) lock_file(installed_file_path) log.info("Removing lock file...") unlock_file(installing_file_path) if not __zookeeper_is_installed(dist_directory=dist_directory, zookeeper_version=zookeeper_version): raise McZooKeeperRunException("I've done everything but ZooKeeper is still not installed.")
def __run_solr(port: int, instance_data_dir: str, hostname: str = fqdn(), jvm_heap_size: str = None, start_jar_args: List[str] = None, jvm_opts: List[str] = None, connect_timeout: int = 120, dist_directory: str = MC_DIST_DIR, solr_version: str = MC_SOLR_VERSION) -> None: """Run Solr instance.""" if jvm_opts is None: jvm_opts = MC_SOLR_STANDALONE_JVM_OPTS if start_jar_args is None: start_jar_args = [] if not __solr_is_installed(): l.info("Solr is not installed, installing...") __install_solr() solr_home_dir = __solr_home_path(solr_home_dir=MC_SOLR_HOME_DIR) if not os.path.isdir(solr_home_dir): raise McSolrRunException("Solr home directory '%s' does not exist." % solr_home_dir) solr_path = __solr_path(dist_directory=dist_directory, solr_version=solr_version) if not os.path.isdir(instance_data_dir): l.info("Creating data directory at %s..." % instance_data_dir) mkdir_p(instance_data_dir) l.info("Updating collections at %s..." % instance_data_dir) collections = __collections(solr_home_dir=solr_home_dir) for collection_name, collection_path in sorted(collections.items()): l.info("Updating collection '%s'..." % collection_name) collection_conf_src_dir = os.path.join(collection_path, "conf") if not os.path.isdir(collection_conf_src_dir): raise McSolrRunException( "Configuration for collection '%s' at %s does not exist" % (collection_name, collection_conf_src_dir)) collection_dst_dir = os.path.join(instance_data_dir, collection_name) mkdir_p(collection_dst_dir) # Remove and copy configuration in case it has changed # (don't symlink because Solr 5.5+ doesn't like those) collection_conf_dst_dir = os.path.join(collection_dst_dir, "conf") if os.path.lexists(collection_conf_dst_dir): l.debug("Removing old collection configuration in '%s'..." % collection_conf_dst_dir) if os.path.islink(collection_conf_dst_dir): # Might still be a link from older Solr versions os.unlink(collection_conf_dst_dir) else: shutil.rmtree(collection_conf_dst_dir) l.info("Copying '%s' to '%s'..." % (collection_conf_src_dir, collection_conf_dst_dir)) shutil.copytree(collection_conf_src_dir, collection_conf_dst_dir, symlinks=False) l.info("Updating core.properties for collection '%s'..." % collection_name) core_properties_path = os.path.join(collection_dst_dir, "core.properties") with open(core_properties_path, 'w') as core_properties_file: core_properties_file.write( """ # # This file is autogenerated. Don't bother editing it! # name=%(collection_name)s instanceDir=%(instance_dir)s """ % { "collection_name": collection_name, "instance_dir": collection_dst_dir, }) l.info("Symlinking shard configuration...") config_items_to_symlink = [ "contexts", "etc", "modules", "resources", "solr.xml", ] for config_item in config_items_to_symlink: config_item_src_path = os.path.join(solr_home_dir, config_item) if not os.path.exists(config_item_src_path): raise McSolrRunException( "Expected configuration item '%s' does not exist" % config_item_src_path) # Recreate symlink just in case config_item_dst_path = os.path.join(instance_data_dir, config_item) if os.path.lexists(config_item_dst_path): if not os.path.islink(config_item_dst_path): raise McSolrRunException( "Configuration item '%s' exists but is not a symlink." % config_item_dst_path) os.unlink(config_item_dst_path) l.info("Symlinking '%s' to '%s'..." % (config_item_src_path, config_item_dst_path)) relative_symlink(config_item_src_path, config_item_dst_path) jetty_home_path = __jetty_home_path(dist_directory=dist_directory, solr_version=solr_version) l.info("Symlinking libraries and JARs...") library_items_to_symlink = [ "lib", "solr-webapp", "start.jar", "solr", "solr-webapp", ] for library_item in library_items_to_symlink: library_item_src_path = os.path.join(jetty_home_path, library_item) if not os.path.exists(library_item_src_path): raise McSolrRunException( "Expected library item '%s' does not exist" % library_item_src_path) # Recreate symlink just in case library_item_dst_path = os.path.join(instance_data_dir, library_item) if os.path.lexists(library_item_dst_path): if not os.path.islink(library_item_dst_path): raise McSolrRunException( "Library item '%s' exists but is not a symlink." % library_item_dst_path) os.unlink(library_item_dst_path) l.info("Symlinking '%s' to '%s'..." % (library_item_src_path, library_item_dst_path)) relative_symlink(library_item_src_path, library_item_dst_path) log4j_properties_path = os.path.join(solr_home_dir, "resources", "log4j.properties") if not os.path.isfile(log4j_properties_path): raise McSolrRunException("log4j.properties at '%s' was not found.") start_jar_path = os.path.join(jetty_home_path, "start.jar") if not os.path.isfile(start_jar_path): raise McSolrRunException("start.jar at '%s' was not found." % start_jar_path) solr_webapp_path = os.path.abspath( os.path.join(jetty_home_path, "solr-webapp")) if not os.path.isdir(solr_webapp_path): raise McSolrRunException("Solr webapp dir at '%s' was not found." % solr_webapp_path) if not hostname_resolves(hostname): raise McSolrRunException("Hostname '%s' does not resolve." % hostname) if tcp_port_is_open(port=port): raise McSolrRunException("Port %d is already open on this machine." % port) __raise_if_old_shards_exist() args = ["java"] l.info("Starting Solr instance on %s, port %d..." % (hostname, port)) if jvm_heap_size is not None: args += ["-Xmx%s" % jvm_heap_size] args += jvm_opts # noinspection SpellCheckingInspection args += [ "-server", "-Djava.util.logging.config.file=file://" + os.path.abspath(log4j_properties_path), "-Djetty.base=%s" % instance_data_dir, "-Djetty.home=%s" % instance_data_dir, "-Djetty.port=%d" % port, "-Dsolr.solr.home=%s" % instance_data_dir, "-Dsolr.data.dir=%s" % instance_data_dir, "-Dhost=%s" % hostname, "-Dmediacloud.luceneMatchVersion=%s" % MC_SOLR_LUCENEMATCHVERSION, # write heap dump to data directory on OOM errors "-XX:+HeapDumpOnOutOfMemoryError", "-XX:HeapDumpPath=%s" % instance_data_dir, # needed for resolving paths to JARs in solrconfig.xml "-Dmediacloud.solr_dist_dir=%s" % solr_path, "-Dmediacloud.solr_webapp_dir=%s" % solr_webapp_path, ] args += start_jar_args args += [ "-jar", start_jar_path, "--module=http", ] l.debug("Running command: %s" % ' '.join(args)) process = subprocess.Popen(args) global __solr_pid __solr_pid = process.pid # Declare that we don't care about the exit code of the child process so # it doesn't become a zombie when it gets killed in signal handler signal.signal(signal.SIGCHLD, signal.SIG_IGN) signal.signal(signal.SIGTERM, __kill_solr_process ) # SIGTERM is handled differently for whatever reason atexit.register(__kill_solr_process) l.info("Solr PID: %d" % __solr_pid) l.info("Solr is starting on port %d, will be available shortly..." % port) wait_for_tcp_port_to_open(port=port, retries=connect_timeout) l.info("Solr is running on port %d!" % port) while True: time.sleep(1)
def run_zookeeper(dist_directory: str = MC_DIST_DIR, listen: str = MC_ZOOKEEPER_LISTEN, port: int = MC_ZOOKEEPER_PORT, data_dir: str = MC_SOLR_BASE_DATA_DIR, zookeeper_version: str = MC_ZOOKEEPER_VERSION, solr_version: str = MC_SOLR_VERSION) -> None: """Run ZooKeeper, install if needed too.""" if not __zookeeper_is_installed(): log.info("ZooKeeper is not installed, installing...") __install_zookeeper() data_dir = resolve_absolute_path_under_mc_root(path=data_dir, must_exist=True) zookeeper_data_dir = os.path.join(data_dir, "mediacloud-cluster-zookeeper") if not os.path.isdir(zookeeper_data_dir): log.info("Creating data directory at %s..." % zookeeper_data_dir) mkdir_p(zookeeper_data_dir) if tcp_port_is_open(port=port): raise McZooKeeperRunException( "Port %d is already open on this machine." % port) zookeeper_path = __zookeeper_path(dist_directory=dist_directory, zookeeper_version=zookeeper_version) zkserver_path = os.path.join(zookeeper_path, "bin", "zkServer.sh") if not os.path.isfile(zkserver_path): raise McZooKeeperRunException("zkServer.sh at '%s' was not found." % zkserver_path) log4j_properties_path = os.path.join(zookeeper_path, "conf", "log4j.properties") if not os.path.isfile(log4j_properties_path): raise McZooKeeperRunException( "log4j.properties at '%s' was not found.") zoo_cnf_path = os.path.join(zookeeper_data_dir, "zoo.cfg") log.info("Creating zoo.cfg in '%s'..." % zoo_cnf_path) with open(zoo_cnf_path, 'w') as zoo_cnf: zoo_cnf.write(""" # # This file is autogenerated. Please do not modify it! # clientPortAddress=%(listen)s clientPort=%(port)d dataDir=%(data_dir)s # Must be between zkClientTimeout / 2 and zkClientTimeout / 20 tickTime=30000 initLimit=10 syncLimit=10 """ % { "listen": listen, "port": port, "data_dir": zookeeper_data_dir, }) zookeeper_env = os.environ.copy() zookeeper_env[ "ZOOCFGDIR"] = zookeeper_data_dir # Serves as configuration dir too zookeeper_env["ZOOCFG"] = "zoo.cfg" zookeeper_env["ZOO_LOG_DIR"] = zookeeper_data_dir zookeeper_env[ "SERVER_JVMFLAGS"] = "-Dlog4j.configuration=file://" + os.path.abspath( log4j_properties_path) args = [zkserver_path, "start-foreground"] log.info("Starting ZooKeeper on %s:%d..." % (listen, port)) log.debug("Running command: %s" % str(args)) log.debug("Environment variables: %s" % str(zookeeper_env)) process = subprocess.Popen(args, env=zookeeper_env) global __zookeeper_pid __zookeeper_pid = process.pid # Declare that we don't care about the exit code of the child process so # it doesn't become a zombie when it gets killed in signal handler signal.signal(signal.SIGCHLD, signal.SIG_IGN) signal.signal(signal.SIGTERM, __kill_zookeeper_process ) # SIGTERM is handled differently for whatever reason atexit.register(__kill_zookeeper_process) log.info("ZooKeeper PID: %d" % __zookeeper_pid) log.info("Waiting for ZooKeeper to start at port %d..." % port) zookeeper_started = wait_for_tcp_port_to_open( port=port, retries=MC_ZOOKEEPER_CONNECT_RETRIES) if not zookeeper_started: raise McZooKeeperRunException( "Unable to connect to ZooKeeper at port %d" % port) log.info( "Uploading initial Solr collection configurations to ZooKeeper...") update_zookeeper_solr_configuration(zookeeper_host="localhost", zookeeper_port=port, dist_directory=dist_directory, solr_version=solr_version) log.info("ZooKeeper is ready on port %d!" % port) while True: time.sleep(1)
def run_zookeeper(dist_directory: str = MC_DIST_DIR, listen: str = MC_ZOOKEEPER_LISTEN, port: int = MC_ZOOKEEPER_PORT, data_dir: str = MC_SOLR_BASE_DATA_DIR, zookeeper_version: str = MC_ZOOKEEPER_VERSION, solr_version: str = MC_SOLR_VERSION) -> None: """Run ZooKeeper, install if needed too.""" if not __zookeeper_is_installed(): log.info("ZooKeeper is not installed, installing...") __install_zookeeper() data_dir = resolve_absolute_path_under_mc_root(path=data_dir, must_exist=True) zookeeper_data_dir = os.path.join(data_dir, "mediacloud-cluster-zookeeper") if not os.path.isdir(zookeeper_data_dir): log.info("Creating data directory at %s..." % zookeeper_data_dir) mkdir_p(zookeeper_data_dir) if tcp_port_is_open(port=port): raise McZooKeeperRunException("Port %d is already open on this machine." % port) zookeeper_path = __zookeeper_path(dist_directory=dist_directory, zookeeper_version=zookeeper_version) zkserver_path = os.path.join(zookeeper_path, "bin", "zkServer.sh") if not os.path.isfile(zkserver_path): raise McZooKeeperRunException("zkServer.sh at '%s' was not found." % zkserver_path) log4j_properties_path = os.path.join(zookeeper_path, "conf", "log4j.properties") if not os.path.isfile(log4j_properties_path): raise McZooKeeperRunException("log4j.properties at '%s' was not found.") zoo_cnf_path = os.path.join(zookeeper_data_dir, "zoo.cfg") log.info("Creating zoo.cfg in '%s'..." % zoo_cnf_path) with open(zoo_cnf_path, 'w') as zoo_cnf: zoo_cnf.write(""" # # This file is autogenerated. Please do not modify it! # clientPortAddress=%(listen)s clientPort=%(port)d dataDir=%(data_dir)s # Must be between zkClientTimeout / 2 and zkClientTimeout / 20 tickTime=30000 initLimit=10 syncLimit=10 """ % { "listen": listen, "port": port, "data_dir": zookeeper_data_dir, }) zookeeper_env = os.environ.copy() zookeeper_env["ZOOCFGDIR"] = zookeeper_data_dir # Serves as configuration dir too zookeeper_env["ZOOCFG"] = "zoo.cfg" zookeeper_env["ZOO_LOG_DIR"] = zookeeper_data_dir zookeeper_env["SERVER_JVMFLAGS"] = "-Dlog4j.configuration=file://" + os.path.abspath(log4j_properties_path) args = [ zkserver_path, "start-foreground" ] log.info("Starting ZooKeeper on %s:%d..." % (listen, port)) log.debug("Running command: %s" % str(args)) log.debug("Environment variables: %s" % str(zookeeper_env)) process = subprocess.Popen(args, env=zookeeper_env) global __zookeeper_pid __zookeeper_pid = process.pid # Declare that we don't care about the exit code of the child process so # it doesn't become a zombie when it gets killed in signal handler signal.signal(signal.SIGCHLD, signal.SIG_IGN) signal.signal(signal.SIGTERM, __kill_zookeeper_process) # SIGTERM is handled differently for whatever reason atexit.register(__kill_zookeeper_process) log.info("ZooKeeper PID: %d" % __zookeeper_pid) log.info("Waiting for ZooKeeper to start at port %d..." % port) zookeeper_started = wait_for_tcp_port_to_open(port=port, retries=MC_ZOOKEEPER_CONNECT_RETRIES) if not zookeeper_started: raise McZooKeeperRunException("Unable to connect to ZooKeeper at port %d" % port) log.info("Uploading initial Solr collection configurations to ZooKeeper...") update_zookeeper_solr_configuration(zookeeper_host="localhost", zookeeper_port=port, dist_directory=dist_directory, solr_version=solr_version) log.info("ZooKeeper is ready on port %d!" % port) while True: time.sleep(1)
def __run_solr( port: int, instance_data_dir: str, hostname: str = fqdn(), jvm_heap_size: str = None, start_jar_args: List[str] = None, jvm_opts: List[str] = None, connect_timeout: int = 120, dist_directory: str = MC_DIST_DIR, solr_version: str = MC_SOLR_VERSION, ) -> None: """Run Solr instance.""" if jvm_opts is None: jvm_opts = MC_SOLR_STANDALONE_JVM_OPTS if start_jar_args is None: start_jar_args = [] if not __solr_is_installed(): l.info("Solr is not installed, installing...") __install_solr() solr_home_dir = __solr_home_path(solr_home_dir=MC_SOLR_HOME_DIR) if not os.path.isdir(solr_home_dir): raise Exception("Solr home directory '%s' does not exist." % solr_home_dir) solr_path = __solr_path(dist_directory=dist_directory, solr_version=solr_version) if not os.path.isdir(instance_data_dir): l.info("Creating data directory at %s..." % instance_data_dir) mkdir_p(instance_data_dir) l.info("Updating collections at %s..." % instance_data_dir) collections = __collections(solr_home_dir=solr_home_dir) for collection_name, collection_path in sorted(collections.items()): l.info("Updating collection '%s'..." % collection_name) collection_conf_src_dir = os.path.join(collection_path, "conf") if not os.path.isdir(collection_conf_src_dir): raise Exception( "Configuration for collection '%s' at %s does not exist" % (collection_name, collection_conf_src_dir) ) collection_dst_dir = os.path.join(instance_data_dir, collection_name) mkdir_p(collection_dst_dir) # Remove and copy configuration in case it has changed # (don't symlink because Solr 5.5+ doesn't like those) collection_conf_dst_dir = os.path.join(collection_dst_dir, "conf") if os.path.lexists(collection_conf_dst_dir): l.debug("Removing old collection configuration in '%s'..." % collection_conf_dst_dir) if os.path.islink(collection_conf_dst_dir): # Might still be a link from older Solr versions os.unlink(collection_conf_dst_dir) else: shutil.rmtree(collection_conf_dst_dir) l.info("Copying '%s' to '%s'..." % (collection_conf_src_dir, collection_conf_dst_dir)) shutil.copytree(collection_conf_src_dir, collection_conf_dst_dir, symlinks=False) l.info("Updating core.properties for collection '%s'..." % collection_name) core_properties_path = os.path.join(collection_dst_dir, "core.properties") with open(core_properties_path, "w") as core_properties_file: core_properties_file.write( """ # # This file is autogenerated. Don't bother editing it! # name=%(collection_name)s instanceDir=%(instance_dir)s """ % {"collection_name": collection_name, "instance_dir": collection_dst_dir} ) l.info("Symlinking shard configuration...") config_items_to_symlink = ["contexts", "etc", "modules", "resources", "solr.xml"] for config_item in config_items_to_symlink: config_item_src_path = os.path.join(solr_home_dir, config_item) if not os.path.exists(config_item_src_path): raise Exception("Expected configuration item '%s' does not exist" % config_item_src_path) # Recreate symlink just in case config_item_dst_path = os.path.join(instance_data_dir, config_item) if os.path.lexists(config_item_dst_path): if not os.path.islink(config_item_dst_path): raise Exception("Configuration item '%s' exists but is not a symlink." % config_item_dst_path) os.unlink(config_item_dst_path) l.info("Symlinking '%s' to '%s'..." % (config_item_src_path, config_item_dst_path)) relative_symlink(config_item_src_path, config_item_dst_path) jetty_home_path = __jetty_home_path(dist_directory=dist_directory, solr_version=solr_version) l.info("Symlinking libraries and JARs...") library_items_to_symlink = ["lib", "solr-webapp", "start.jar", "solr", "solr-webapp"] for library_item in library_items_to_symlink: library_item_src_path = os.path.join(jetty_home_path, library_item) if not os.path.exists(library_item_src_path): raise Exception("Expected library item '%s' does not exist" % library_item_src_path) # Recreate symlink just in case library_item_dst_path = os.path.join(instance_data_dir, library_item) if os.path.lexists(library_item_dst_path): if not os.path.islink(library_item_dst_path): raise Exception("Library item '%s' exists but is not a symlink." % library_item_dst_path) os.unlink(library_item_dst_path) l.info("Symlinking '%s' to '%s'..." % (library_item_src_path, library_item_dst_path)) relative_symlink(library_item_src_path, library_item_dst_path) log4j_properties_path = os.path.join(solr_home_dir, "resources", "log4j.properties") if not os.path.isfile(log4j_properties_path): raise Exception("log4j.properties at '%s' was not found.") start_jar_path = os.path.join(jetty_home_path, "start.jar") if not os.path.isfile(start_jar_path): raise Exception("start.jar at '%s' was not found." % start_jar_path) solr_webapp_path = os.path.abspath(os.path.join(jetty_home_path, "solr-webapp")) if not os.path.isdir(solr_webapp_path): raise Exception("Solr webapp dir at '%s' was not found." % solr_webapp_path) if not hostname_resolves(hostname): raise Exception("Hostname '%s' does not resolve." % hostname) if tcp_port_is_open(port=port): raise Exception("Port %d is already open on this machine." % port) __raise_if_old_shards_exist() args = ["java"] l.info("Starting Solr instance on %s, port %d..." % (hostname, port)) if jvm_heap_size is not None: args += ["-Xmx%s" % jvm_heap_size] args += jvm_opts # noinspection SpellCheckingInspection args += [ "-server", "-Djava.util.logging.config.file=file://" + os.path.abspath(log4j_properties_path), "-Djetty.base=%s" % instance_data_dir, "-Djetty.home=%s" % instance_data_dir, "-Djetty.port=%d" % port, "-Dsolr.solr.home=%s" % instance_data_dir, "-Dsolr.data.dir=%s" % instance_data_dir, "-Dhost=%s" % hostname, "-Dmediacloud.luceneMatchVersion=%s" % MC_SOLR_LUCENEMATCHVERSION, # write heap dump to data directory on OOM errors "-XX:+HeapDumpOnOutOfMemoryError", "-XX:HeapDumpPath=%s" % instance_data_dir, # needed for resolving paths to JARs in solrconfig.xml "-Dmediacloud.solr_dist_dir=%s" % solr_path, "-Dmediacloud.solr_webapp_dir=%s" % solr_webapp_path, ] args += start_jar_args args += ["-jar", start_jar_path, "--module=http"] l.debug("Running command: %s" % " ".join(args)) process = subprocess.Popen(args) global __solr_pid __solr_pid = process.pid # Declare that we don't care about the exit code of the child process so # it doesn't become a zombie when it gets killed in signal handler signal.signal(signal.SIGCHLD, signal.SIG_IGN) signal.signal(signal.SIGTERM, __kill_solr_process) # SIGTERM is handled differently for whatever reason atexit.register(__kill_solr_process) l.info("Solr PID: %d" % __solr_pid) l.info("Solr is starting on port %d, will be available shortly..." % port) wait_for_tcp_port_to_open(port=port, retries=connect_timeout) l.info("Solr is running on port %d!" % port) while True: time.sleep(1)