def run_solr_standalone(hostname: str = fqdn(), port: int = MC_SOLR_STANDALONE_PORT, base_data_dir: str = MC_SOLR_BASE_DATA_DIR, dist_directory: str = MC_DIST_DIR, solr_version: str = MC_SOLR_VERSION, jvm_heap_size: str = MC_SOLR_STANDALONE_JVM_HEAP_SIZE): """Run standalone instance of Solr.""" if not __solr_is_installed(dist_directory=dist_directory, solr_version=solr_version): l.info("Solr is not installed, installing...") __install_solr(dist_directory=dist_directory, solr_version=solr_version) base_data_dir = resolve_absolute_path_under_mc_root(path=base_data_dir, must_exist=True) standalone_data_dir = __standalone_data_dir(base_data_dir=base_data_dir) if tcp_port_is_open(port=port): raise McSolrRunException("Port %d is already open on this machine." % port) l.info("Starting standalone Solr instance on port %d..." % port) __run_solr(hostname=hostname, port=port, instance_data_dir=standalone_data_dir, jvm_heap_size=jvm_heap_size, jvm_opts=MC_SOLR_STANDALONE_JVM_OPTS, connect_timeout=MC_SOLR_STANDALONE_CONNECT_RETRIES, dist_directory=dist_directory, solr_version=solr_version)
def run_solr_standalone( hostname: str = fqdn(), port: int = MC_SOLR_STANDALONE_PORT, base_data_dir: str = MC_SOLR_BASE_DATA_DIR, dist_directory: str = MC_DIST_DIR, solr_version: str = MC_SOLR_VERSION, jvm_heap_size: str = MC_SOLR_STANDALONE_JVM_HEAP_SIZE, ): """Run standalone instance of Solr.""" if not __solr_is_installed(dist_directory=dist_directory, solr_version=solr_version): l.info("Solr is not installed, installing...") __install_solr(dist_directory=dist_directory, solr_version=solr_version) base_data_dir = resolve_absolute_path_under_mc_root(path=base_data_dir, must_exist=True) standalone_data_dir = __standalone_data_dir(base_data_dir=base_data_dir) if tcp_port_is_open(port=port): raise Exception("Port %d is already open on this machine." % port) l.info("Starting standalone Solr instance on port %d..." % port) __run_solr( hostname=hostname, port=port, instance_data_dir=standalone_data_dir, jvm_heap_size=jvm_heap_size, jvm_opts=MC_SOLR_STANDALONE_JVM_OPTS, connect_timeout=MC_SOLR_STANDALONE_CONNECT_RETRIES, dist_directory=dist_directory, solr_version=solr_version, )
def run_solr_shard(shard_num: int, shard_count: int, hostname: str = None, starting_port: int = MC_SOLR_CLUSTER_STARTING_PORT, base_data_dir: str = MC_SOLR_BASE_DATA_DIR, dist_directory: str = MC_DIST_DIR, solr_version: str = MC_SOLR_VERSION, zookeeper_host: str = MC_SOLR_CLUSTER_ZOOKEEPER_HOST, zookeeper_port: int = MC_SOLR_CLUSTER_ZOOKEEPER_PORT, jvm_heap_size: str = MC_SOLR_CLUSTER_JVM_HEAP_SIZE) -> None: """Run Solr shard, install Solr if needed; read configuration from ZooKeeper.""" if shard_num < 1: raise McSolrRunException("Shard number must be 1 or greater.") if shard_count < 1: raise McSolrRunException("Shard count must be 1 or greater.") if not __solr_is_installed(dist_directory=dist_directory, solr_version=solr_version): log.info("Solr is not installed, installing...") __install_solr(dist_directory=dist_directory, solr_version=solr_version) if hostname is None: hostname = fqdn() base_data_dir = resolve_absolute_path_under_mc_root(path=base_data_dir, must_exist=True) shard_port = __shard_port(shard_num=shard_num, starting_port=starting_port) shard_data_dir = __shard_data_dir(shard_num=shard_num, base_data_dir=base_data_dir) log.info("Waiting for ZooKeeper to start on %s:%d..." % (zookeeper_host, zookeeper_port)) wait_for_tcp_port_to_open( hostname=zookeeper_host, port=zookeeper_port, retries=MC_SOLR_CLUSTER_ZOOKEEPER_CONNECT_RETRIES) log.info("ZooKeeper is up!") log.info("Starting Solr shard %d on port %d..." % (shard_num, shard_port)) # noinspection SpellCheckingInspection shard_args = [ "-DzkHost=%s:%d" % (zookeeper_host, zookeeper_port), "-DnumShards=%d" % shard_count, ] __run_solr(hostname=hostname, port=shard_port, instance_data_dir=shard_data_dir, jvm_heap_size=jvm_heap_size, jvm_opts=MC_SOLR_CLUSTER_JVM_OPTS, start_jar_args=shard_args, connect_timeout=MC_SOLR_CLUSTER_CONNECT_RETRIES, dist_directory=dist_directory, solr_version=solr_version)
def run_solr_shard(shard_num: int, shard_count: int, hostname: str = None, starting_port: int = MC_SOLR_CLUSTER_STARTING_PORT, base_data_dir: str = MC_SOLR_BASE_DATA_DIR, dist_directory: str = MC_DIST_DIR, solr_version: str = MC_SOLR_VERSION, zookeeper_host: str = MC_SOLR_CLUSTER_ZOOKEEPER_HOST, zookeeper_port: int = MC_SOLR_CLUSTER_ZOOKEEPER_PORT, jvm_heap_size: str = MC_SOLR_CLUSTER_JVM_HEAP_SIZE) -> None: """Run Solr shard, install Solr if needed; read configuration from ZooKeeper.""" if shard_num < 1: raise McSolrRunException("Shard number must be 1 or greater.") if shard_count < 1: raise McSolrRunException("Shard count must be 1 or greater.") if not __solr_is_installed(dist_directory=dist_directory, solr_version=solr_version): log.info("Solr is not installed, installing...") __install_solr(dist_directory=dist_directory, solr_version=solr_version) if hostname is None: hostname = fqdn() base_data_dir = resolve_absolute_path_under_mc_root(path=base_data_dir, must_exist=True) shard_port = __shard_port(shard_num=shard_num, starting_port=starting_port) shard_data_dir = __shard_data_dir(shard_num=shard_num, base_data_dir=base_data_dir) log.info("Waiting for ZooKeeper to start on %s:%d..." % (zookeeper_host, zookeeper_port)) wait_for_tcp_port_to_open(hostname=zookeeper_host, port=zookeeper_port, retries=MC_SOLR_CLUSTER_ZOOKEEPER_CONNECT_RETRIES) log.info("ZooKeeper is up!") log.info("Starting Solr shard %d on port %d..." % (shard_num, shard_port)) # noinspection SpellCheckingInspection shard_args = [ "-DzkHost=%s:%d" % (zookeeper_host, zookeeper_port), "-DnumShards=%d" % shard_count, ] __run_solr(hostname=hostname, port=shard_port, instance_data_dir=shard_data_dir, jvm_heap_size=jvm_heap_size, jvm_opts=MC_SOLR_CLUSTER_JVM_OPTS, start_jar_args=shard_args, connect_timeout=MC_SOLR_CLUSTER_CONNECT_RETRIES, dist_directory=dist_directory, solr_version=solr_version)
def __run_solr(port: int, instance_data_dir: str, hostname: str = fqdn(), jvm_heap_size: str = None, start_jar_args: List[str] = None, jvm_opts: List[str] = None, connect_timeout: int = 120, dist_directory: str = MC_DIST_DIR, solr_version: str = MC_SOLR_VERSION) -> None: """Run Solr instance.""" if jvm_opts is None: jvm_opts = MC_SOLR_STANDALONE_JVM_OPTS if start_jar_args is None: start_jar_args = [] if not __solr_is_installed(): l.info("Solr is not installed, installing...") __install_solr() solr_home_dir = __solr_home_path(solr_home_dir=MC_SOLR_HOME_DIR) if not os.path.isdir(solr_home_dir): raise McSolrRunException("Solr home directory '%s' does not exist." % solr_home_dir) solr_path = __solr_path(dist_directory=dist_directory, solr_version=solr_version) if not os.path.isdir(instance_data_dir): l.info("Creating data directory at %s..." % instance_data_dir) mkdir_p(instance_data_dir) l.info("Updating collections at %s..." % instance_data_dir) collections = __collections(solr_home_dir=solr_home_dir) for collection_name, collection_path in sorted(collections.items()): l.info("Updating collection '%s'..." % collection_name) collection_conf_src_dir = os.path.join(collection_path, "conf") if not os.path.isdir(collection_conf_src_dir): raise McSolrRunException( "Configuration for collection '%s' at %s does not exist" % (collection_name, collection_conf_src_dir)) collection_dst_dir = os.path.join(instance_data_dir, collection_name) mkdir_p(collection_dst_dir) # Remove and copy configuration in case it has changed # (don't symlink because Solr 5.5+ doesn't like those) collection_conf_dst_dir = os.path.join(collection_dst_dir, "conf") if os.path.lexists(collection_conf_dst_dir): l.debug("Removing old collection configuration in '%s'..." % collection_conf_dst_dir) if os.path.islink(collection_conf_dst_dir): # Might still be a link from older Solr versions os.unlink(collection_conf_dst_dir) else: shutil.rmtree(collection_conf_dst_dir) l.info("Copying '%s' to '%s'..." % (collection_conf_src_dir, collection_conf_dst_dir)) shutil.copytree(collection_conf_src_dir, collection_conf_dst_dir, symlinks=False) l.info("Updating core.properties for collection '%s'..." % collection_name) core_properties_path = os.path.join(collection_dst_dir, "core.properties") with open(core_properties_path, 'w') as core_properties_file: core_properties_file.write( """ # # This file is autogenerated. Don't bother editing it! # name=%(collection_name)s instanceDir=%(instance_dir)s """ % { "collection_name": collection_name, "instance_dir": collection_dst_dir, }) l.info("Symlinking shard configuration...") config_items_to_symlink = [ "contexts", "etc", "modules", "resources", "solr.xml", ] for config_item in config_items_to_symlink: config_item_src_path = os.path.join(solr_home_dir, config_item) if not os.path.exists(config_item_src_path): raise McSolrRunException( "Expected configuration item '%s' does not exist" % config_item_src_path) # Recreate symlink just in case config_item_dst_path = os.path.join(instance_data_dir, config_item) if os.path.lexists(config_item_dst_path): if not os.path.islink(config_item_dst_path): raise McSolrRunException( "Configuration item '%s' exists but is not a symlink." % config_item_dst_path) os.unlink(config_item_dst_path) l.info("Symlinking '%s' to '%s'..." % (config_item_src_path, config_item_dst_path)) relative_symlink(config_item_src_path, config_item_dst_path) jetty_home_path = __jetty_home_path(dist_directory=dist_directory, solr_version=solr_version) l.info("Symlinking libraries and JARs...") library_items_to_symlink = [ "lib", "solr-webapp", "start.jar", "solr", "solr-webapp", ] for library_item in library_items_to_symlink: library_item_src_path = os.path.join(jetty_home_path, library_item) if not os.path.exists(library_item_src_path): raise McSolrRunException( "Expected library item '%s' does not exist" % library_item_src_path) # Recreate symlink just in case library_item_dst_path = os.path.join(instance_data_dir, library_item) if os.path.lexists(library_item_dst_path): if not os.path.islink(library_item_dst_path): raise McSolrRunException( "Library item '%s' exists but is not a symlink." % library_item_dst_path) os.unlink(library_item_dst_path) l.info("Symlinking '%s' to '%s'..." % (library_item_src_path, library_item_dst_path)) relative_symlink(library_item_src_path, library_item_dst_path) log4j_properties_path = os.path.join(solr_home_dir, "resources", "log4j.properties") if not os.path.isfile(log4j_properties_path): raise McSolrRunException("log4j.properties at '%s' was not found.") start_jar_path = os.path.join(jetty_home_path, "start.jar") if not os.path.isfile(start_jar_path): raise McSolrRunException("start.jar at '%s' was not found." % start_jar_path) solr_webapp_path = os.path.abspath( os.path.join(jetty_home_path, "solr-webapp")) if not os.path.isdir(solr_webapp_path): raise McSolrRunException("Solr webapp dir at '%s' was not found." % solr_webapp_path) if not hostname_resolves(hostname): raise McSolrRunException("Hostname '%s' does not resolve." % hostname) if tcp_port_is_open(port=port): raise McSolrRunException("Port %d is already open on this machine." % port) __raise_if_old_shards_exist() args = ["java"] l.info("Starting Solr instance on %s, port %d..." % (hostname, port)) if jvm_heap_size is not None: args += ["-Xmx%s" % jvm_heap_size] args += jvm_opts # noinspection SpellCheckingInspection args += [ "-server", "-Djava.util.logging.config.file=file://" + os.path.abspath(log4j_properties_path), "-Djetty.base=%s" % instance_data_dir, "-Djetty.home=%s" % instance_data_dir, "-Djetty.port=%d" % port, "-Dsolr.solr.home=%s" % instance_data_dir, "-Dsolr.data.dir=%s" % instance_data_dir, "-Dhost=%s" % hostname, "-Dmediacloud.luceneMatchVersion=%s" % MC_SOLR_LUCENEMATCHVERSION, # write heap dump to data directory on OOM errors "-XX:+HeapDumpOnOutOfMemoryError", "-XX:HeapDumpPath=%s" % instance_data_dir, # needed for resolving paths to JARs in solrconfig.xml "-Dmediacloud.solr_dist_dir=%s" % solr_path, "-Dmediacloud.solr_webapp_dir=%s" % solr_webapp_path, ] args += start_jar_args args += [ "-jar", start_jar_path, "--module=http", ] l.debug("Running command: %s" % ' '.join(args)) process = subprocess.Popen(args) global __solr_pid __solr_pid = process.pid # Declare that we don't care about the exit code of the child process so # it doesn't become a zombie when it gets killed in signal handler signal.signal(signal.SIGCHLD, signal.SIG_IGN) signal.signal(signal.SIGTERM, __kill_solr_process ) # SIGTERM is handled differently for whatever reason atexit.register(__kill_solr_process) l.info("Solr PID: %d" % __solr_pid) l.info("Solr is starting on port %d, will be available shortly..." % port) wait_for_tcp_port_to_open(port=port, retries=connect_timeout) l.info("Solr is running on port %d!" % port) while True: time.sleep(1)
def test_fqdn(): fq_hostname = fqdn() assert fq_hostname != '' assert hostname_resolves(fq_hostname) is True
from mediawords.util.network import fqdn if __name__ == "__main__": parser = argparse.ArgumentParser(description="Install Solr and start a shard.", formatter_class=argparse.ArgumentDefaultsHelpFormatter) shard_group = parser.add_mutually_exclusive_group(required=True) # Shard number for humans (1, 2, 3, ...) shard_group.add_argument("-n", "--shard_num", type=int, help="Shard number to start (starts with 1).") # Shard index for Supervisor (0, 1, 2, ...) shard_group.add_argument("-i", "--shard_index", type=int, help="Shard index to start (starts with 0).") parser.add_argument("-c", "--shard_count", type=int, required=True, help="Number of shards across the whole cluster.") parser.add_argument("-hn", "--hostname", type=str, required=False, default=fqdn(), help="Server hostname (must be resolveable by other shards).") parser.add_argument("-zh", "--zookeeper_host", type=str, required=False, default=MC_SOLR_CLUSTER_ZOOKEEPER_HOST, help="ZooKeeper host to connect to.") parser.add_argument("-zp", "--zookeeper_port", type=int, required=False, default=MC_SOLR_CLUSTER_ZOOKEEPER_PORT, help="ZooKeeper port to connect to.") parser.add_argument("-mx", "--jvm_heap_size", type=str, required=False, default=MC_SOLR_CLUSTER_JVM_HEAP_SIZE, help="JVM heap size (-Xmx).") args = parser.parse_args() shard_num = args.shard_num if shard_num is None: shard_num = args.shard_index + 1 run_solr_shard(shard_num=shard_num,
def __run_solr( port: int, instance_data_dir: str, hostname: str = fqdn(), jvm_heap_size: str = None, start_jar_args: List[str] = None, jvm_opts: List[str] = None, connect_timeout: int = 120, dist_directory: str = MC_DIST_DIR, solr_version: str = MC_SOLR_VERSION, ) -> None: """Run Solr instance.""" if jvm_opts is None: jvm_opts = MC_SOLR_STANDALONE_JVM_OPTS if start_jar_args is None: start_jar_args = [] if not __solr_is_installed(): l.info("Solr is not installed, installing...") __install_solr() solr_home_dir = __solr_home_path(solr_home_dir=MC_SOLR_HOME_DIR) if not os.path.isdir(solr_home_dir): raise Exception("Solr home directory '%s' does not exist." % solr_home_dir) solr_path = __solr_path(dist_directory=dist_directory, solr_version=solr_version) if not os.path.isdir(instance_data_dir): l.info("Creating data directory at %s..." % instance_data_dir) mkdir_p(instance_data_dir) l.info("Updating collections at %s..." % instance_data_dir) collections = __collections(solr_home_dir=solr_home_dir) for collection_name, collection_path in sorted(collections.items()): l.info("Updating collection '%s'..." % collection_name) collection_conf_src_dir = os.path.join(collection_path, "conf") if not os.path.isdir(collection_conf_src_dir): raise Exception( "Configuration for collection '%s' at %s does not exist" % (collection_name, collection_conf_src_dir) ) collection_dst_dir = os.path.join(instance_data_dir, collection_name) mkdir_p(collection_dst_dir) # Remove and copy configuration in case it has changed # (don't symlink because Solr 5.5+ doesn't like those) collection_conf_dst_dir = os.path.join(collection_dst_dir, "conf") if os.path.lexists(collection_conf_dst_dir): l.debug("Removing old collection configuration in '%s'..." % collection_conf_dst_dir) if os.path.islink(collection_conf_dst_dir): # Might still be a link from older Solr versions os.unlink(collection_conf_dst_dir) else: shutil.rmtree(collection_conf_dst_dir) l.info("Copying '%s' to '%s'..." % (collection_conf_src_dir, collection_conf_dst_dir)) shutil.copytree(collection_conf_src_dir, collection_conf_dst_dir, symlinks=False) l.info("Updating core.properties for collection '%s'..." % collection_name) core_properties_path = os.path.join(collection_dst_dir, "core.properties") with open(core_properties_path, "w") as core_properties_file: core_properties_file.write( """ # # This file is autogenerated. Don't bother editing it! # name=%(collection_name)s instanceDir=%(instance_dir)s """ % {"collection_name": collection_name, "instance_dir": collection_dst_dir} ) l.info("Symlinking shard configuration...") config_items_to_symlink = ["contexts", "etc", "modules", "resources", "solr.xml"] for config_item in config_items_to_symlink: config_item_src_path = os.path.join(solr_home_dir, config_item) if not os.path.exists(config_item_src_path): raise Exception("Expected configuration item '%s' does not exist" % config_item_src_path) # Recreate symlink just in case config_item_dst_path = os.path.join(instance_data_dir, config_item) if os.path.lexists(config_item_dst_path): if not os.path.islink(config_item_dst_path): raise Exception("Configuration item '%s' exists but is not a symlink." % config_item_dst_path) os.unlink(config_item_dst_path) l.info("Symlinking '%s' to '%s'..." % (config_item_src_path, config_item_dst_path)) relative_symlink(config_item_src_path, config_item_dst_path) jetty_home_path = __jetty_home_path(dist_directory=dist_directory, solr_version=solr_version) l.info("Symlinking libraries and JARs...") library_items_to_symlink = ["lib", "solr-webapp", "start.jar", "solr", "solr-webapp"] for library_item in library_items_to_symlink: library_item_src_path = os.path.join(jetty_home_path, library_item) if not os.path.exists(library_item_src_path): raise Exception("Expected library item '%s' does not exist" % library_item_src_path) # Recreate symlink just in case library_item_dst_path = os.path.join(instance_data_dir, library_item) if os.path.lexists(library_item_dst_path): if not os.path.islink(library_item_dst_path): raise Exception("Library item '%s' exists but is not a symlink." % library_item_dst_path) os.unlink(library_item_dst_path) l.info("Symlinking '%s' to '%s'..." % (library_item_src_path, library_item_dst_path)) relative_symlink(library_item_src_path, library_item_dst_path) log4j_properties_path = os.path.join(solr_home_dir, "resources", "log4j.properties") if not os.path.isfile(log4j_properties_path): raise Exception("log4j.properties at '%s' was not found.") start_jar_path = os.path.join(jetty_home_path, "start.jar") if not os.path.isfile(start_jar_path): raise Exception("start.jar at '%s' was not found." % start_jar_path) solr_webapp_path = os.path.abspath(os.path.join(jetty_home_path, "solr-webapp")) if not os.path.isdir(solr_webapp_path): raise Exception("Solr webapp dir at '%s' was not found." % solr_webapp_path) if not hostname_resolves(hostname): raise Exception("Hostname '%s' does not resolve." % hostname) if tcp_port_is_open(port=port): raise Exception("Port %d is already open on this machine." % port) __raise_if_old_shards_exist() args = ["java"] l.info("Starting Solr instance on %s, port %d..." % (hostname, port)) if jvm_heap_size is not None: args += ["-Xmx%s" % jvm_heap_size] args += jvm_opts # noinspection SpellCheckingInspection args += [ "-server", "-Djava.util.logging.config.file=file://" + os.path.abspath(log4j_properties_path), "-Djetty.base=%s" % instance_data_dir, "-Djetty.home=%s" % instance_data_dir, "-Djetty.port=%d" % port, "-Dsolr.solr.home=%s" % instance_data_dir, "-Dsolr.data.dir=%s" % instance_data_dir, "-Dhost=%s" % hostname, "-Dmediacloud.luceneMatchVersion=%s" % MC_SOLR_LUCENEMATCHVERSION, # write heap dump to data directory on OOM errors "-XX:+HeapDumpOnOutOfMemoryError", "-XX:HeapDumpPath=%s" % instance_data_dir, # needed for resolving paths to JARs in solrconfig.xml "-Dmediacloud.solr_dist_dir=%s" % solr_path, "-Dmediacloud.solr_webapp_dir=%s" % solr_webapp_path, ] args += start_jar_args args += ["-jar", start_jar_path, "--module=http"] l.debug("Running command: %s" % " ".join(args)) process = subprocess.Popen(args) global __solr_pid __solr_pid = process.pid # Declare that we don't care about the exit code of the child process so # it doesn't become a zombie when it gets killed in signal handler signal.signal(signal.SIGCHLD, signal.SIG_IGN) signal.signal(signal.SIGTERM, __kill_solr_process) # SIGTERM is handled differently for whatever reason atexit.register(__kill_solr_process) l.info("Solr PID: %d" % __solr_pid) l.info("Solr is starting on port %d, will be available shortly..." % port) wait_for_tcp_port_to_open(port=port, retries=connect_timeout) l.info("Solr is running on port %d!" % port) while True: time.sleep(1)
import argparse from mediawords.solr.run.constants import * from mediawords.solr.run.solr import run_solr_standalone from mediawords.util.network import fqdn if __name__ == "__main__": parser = argparse.ArgumentParser( description="Install Solr and start a standalone instance.", formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument( "-hn", "--hostname", type=str, required=False, default=fqdn(), help="Server hostname (must be resolveable by other shards).") parser.add_argument("-p", "--port", type=int, required=False, default=MC_SOLR_STANDALONE_PORT, help="Port.") parser.add_argument("-mx", "--jvm_heap_size", type=str, required=False, default=MC_SOLR_STANDALONE_JVM_HEAP_SIZE, help="JVM heap size (-Xmx).") args = parser.parse_args()