Пример #1
0
def __install_solr(dist_directory: str = MC_DIST_DIR,
                   solr_version: str = MC_SOLR_VERSION) -> None:
    """Install Solr to distribution directory; lock directory before installing and unlock afterwards."""
    if __solr_is_installed(dist_directory=dist_directory,
                           solr_version=solr_version):
        raise McSolrRunException(
            "Solr %s is already installed in distribution directory '%s'." %
            (solr_version, dist_directory))

    solr_path = __solr_path(dist_directory=dist_directory,
                            solr_version=solr_version)

    log.info("Creating Solr directory...")
    mkdir_p(solr_path)

    installing_file_path = __solr_installing_file_path(
        dist_directory=dist_directory, solr_version=solr_version)

    log.info("Locking Solr directory for installation...")
    lock_file(installing_file_path, timeout=MC_INSTALL_TIMEOUT)

    # Waited for concurrent installation to finish?
    if __solr_is_installed(dist_directory=dist_directory,
                           solr_version=solr_version):
        log.info(
            "While waiting for Solr directory to unlock, Solr got installed to said directory."
        )
        return

    solr_dist_url = __solr_dist_url(solr_version=solr_version)

    log.info("Downloading Solr %s from %s..." % (solr_version, solr_dist_url))
    solr_tarball_dest_path = download_file_to_temp_path(solr_dist_url)

    log.info("Extracting %s to %s..." % (solr_tarball_dest_path, solr_path))
    extract_tarball_to_directory(archive_file=solr_tarball_dest_path,
                                 dest_directory=solr_path,
                                 strip_root=True)

    # Solr needs its .war extracted first before ZkCLI is usable
    jetty_home_path = __jetty_home_path(dist_directory=dist_directory,
                                        solr_version=solr_version)

    solr_war_dest_dir = os.path.join(jetty_home_path, "solr-webapp", "webapp")
    if not os.path.exists(os.path.join(solr_war_dest_dir, "index.html")):
        raise McSolrRunException("Solr's .war is not extracted at path %s" %
                                 solr_war_dest_dir)

    log.info("Creating 'installed' file...")
    installed_file_path = __solr_installed_file_path(
        dist_directory=dist_directory, solr_version=solr_version)
    lock_file(installed_file_path)

    log.info("Removing lock file...")
    unlock_file(installing_file_path)

    if not __solr_is_installed(dist_directory=dist_directory,
                               solr_version=solr_version):
        raise McSolrRunException(
            "I've done everything but Solr is still not installed.")
Пример #2
0
def __install_zookeeper(dist_directory: str = MC_DIST_DIR,
                        zookeeper_version: str = MC_ZOOKEEPER_VERSION) -> None:
    """Install ZooKeeper to distribution directory; lock directory before installing and unlock afterwards."""
    if __zookeeper_is_installed(dist_directory=dist_directory,
                                zookeeper_version=zookeeper_version):
        raise McZooKeeperRunException(
            "ZooKeeper %s is already installed in distribution directory '%s'."
            % (zookeeper_version, dist_directory))

    zookeeper_path = __zookeeper_path(dist_directory=dist_directory,
                                      zookeeper_version=zookeeper_version)

    log.info("Creating ZooKeeper directory...")
    mkdir_p(zookeeper_path)

    installing_file_path = __zookeeper_installing_file_path(
        dist_directory=dist_directory, zookeeper_version=zookeeper_version)

    log.info("Locking ZooKeeper directory for installation...")
    lock_file(installing_file_path, timeout=MC_INSTALL_TIMEOUT)

    # Waited for concurrent installation to finish?
    if __zookeeper_is_installed(dist_directory=dist_directory,
                                zookeeper_version=zookeeper_version):
        log.info(
            "While waiting for ZooKeeper directory to unlock, ZooKeeper got installed to said directory."
        )
        return

    zookeeper_dist_url = __zookeeper_dist_url(
        zookeeper_version=zookeeper_version)

    log.info("Downloading ZooKeeper %s from %s..." %
             (zookeeper_version, zookeeper_dist_url))
    zookeeper_tarball_dest_path = download_file_to_temp_path(
        source_url=zookeeper_dist_url)

    log.info("Extracting %s to %s..." %
             (zookeeper_tarball_dest_path, zookeeper_path))
    extract_tarball_to_directory(archive_file=zookeeper_tarball_dest_path,
                                 dest_directory=zookeeper_path,
                                 strip_root=True)

    log.info("Creating 'installed' file...")
    installed_file_path = __zookeeper_installed_file_path(
        dist_directory=dist_directory, zookeeper_version=zookeeper_version)
    lock_file(installed_file_path)

    log.info("Removing lock file...")
    unlock_file(installing_file_path)

    if not __zookeeper_is_installed(dist_directory=dist_directory,
                                    zookeeper_version=zookeeper_version):
        raise McZooKeeperRunException(
            "I've done everything but ZooKeeper is still not installed.")
Пример #3
0
def test_mkdir_p():
    temp_dir = tempfile.mkdtemp()

    test_dir = os.path.join(temp_dir, 'foo', 'bar', 'baz')
    assert os.path.isdir(test_dir) is False

    mc_paths.mkdir_p(test_dir)
    assert os.path.isdir(test_dir) is True

    # Try creating again
    mc_paths.mkdir_p(test_dir)
    assert os.path.isdir(test_dir) is True
Пример #4
0
def __install_solr(dist_directory: str = MC_DIST_DIR, solr_version: str = MC_SOLR_VERSION) -> None:
    """Install Solr to distribution directory; lock directory before installing and unlock afterwards."""
    if __solr_is_installed(dist_directory=dist_directory, solr_version=solr_version):
        raise Exception("Solr %s is already installed in distribution directory '%s'." % (solr_version, dist_directory))

    solr_path = __solr_path(dist_directory=dist_directory, solr_version=solr_version)

    l.info("Creating Solr directory...")
    mkdir_p(solr_path)

    installing_file_path = __solr_installing_file_path(dist_directory=dist_directory, solr_version=solr_version)

    l.info("Locking Solr directory for installation...")
    lock_file(installing_file_path, timeout=MC_INSTALL_TIMEOUT)

    # Waited for concurrent installation to finish?
    if __solr_is_installed(dist_directory=dist_directory, solr_version=solr_version):
        l.info("While waiting for Solr directory to unlock, Solr got installed to said directory.")
        return

    solr_dist_url = __solr_dist_url(solr_version=solr_version)

    l.info("Downloading Solr %s from %s..." % (solr_version, solr_dist_url))
    solr_tarball_dest_path = download_file_to_temp_path(solr_dist_url)

    l.info("Extracting %s to %s..." % (solr_tarball_dest_path, solr_path))
    extract_tarball_to_directory(archive_file=solr_tarball_dest_path, dest_directory=solr_path, strip_root=True)

    # Solr needs its .war extracted first before ZkCLI is usable
    jetty_home_path = __jetty_home_path(dist_directory=dist_directory, solr_version=solr_version)
    solr_war_dest_dir = os.path.join(jetty_home_path, "solr-webapp", "webapp")

    # Solr 5.5.2+ already has the .war extracted
    if not os.path.exists(os.path.join(solr_war_dest_dir, "index.html")):
        solr_war_path = os.path.join(jetty_home_path, "webapps", "solr.war")
        if not os.path.isfile(solr_war_path):
            raise Exception("Solr's .war file does not exist at path %s" % solr_war_path)

        solr_war_dest_dir = os.path.join(jetty_home_path, "solr-webapp", "webapp")
        l.info("Extracting solr.war at '%s' to '%s'..." % (solr_war_path, solr_war_dest_dir))
        mkdir_p(solr_war_dest_dir)
        extract_zip_to_directory(archive_file=solr_war_path, dest_directory=solr_war_dest_dir)

    l.info("Creating 'installed' file...")
    installed_file_path = __solr_installed_file_path(dist_directory=dist_directory, solr_version=solr_version)
    lock_file(installed_file_path)

    l.info("Removing lock file...")
    unlock_file(installing_file_path)

    if not __solr_is_installed(dist_directory=dist_directory, solr_version=solr_version):
        raise Exception("I've done everything but Solr is still not installed.")
Пример #5
0
def test_relative_symlink():
    temp_dir = tempfile.mkdtemp()

    source_dir = os.path.join(temp_dir, 'src', 'a', 'b', 'c')
    mc_paths.mkdir_p(source_dir)
    with open(os.path.join(source_dir, 'test.txt'), 'w') as fh:
        fh.write('foo')

    dest_dir = os.path.join(temp_dir, 'dst', 'd', 'e')
    mc_paths.mkdir_p(dest_dir)
    dest_symlink = os.path.join(dest_dir, 'f')

    mc_paths.relative_symlink(source=source_dir, link_name=dest_symlink)

    assert os.path.exists(dest_symlink)
    assert os.path.lexists(dest_symlink)
    assert os.path.islink(dest_symlink)
    assert os.path.exists(os.path.join(dest_symlink, 'test.txt'))
Пример #6
0
def __install_zookeeper(dist_directory: str = MC_DIST_DIR, zookeeper_version: str = MC_ZOOKEEPER_VERSION) -> None:
    """Install ZooKeeper to distribution directory; lock directory before installing and unlock afterwards."""
    if __zookeeper_is_installed(dist_directory=dist_directory, zookeeper_version=zookeeper_version):
        raise McZooKeeperRunException("ZooKeeper %s is already installed in distribution directory '%s'." % (
            zookeeper_version, dist_directory
        ))

    zookeeper_path = __zookeeper_path(dist_directory=dist_directory, zookeeper_version=zookeeper_version)

    log.info("Creating ZooKeeper directory...")
    mkdir_p(zookeeper_path)

    installing_file_path = __zookeeper_installing_file_path(dist_directory=dist_directory,
                                                            zookeeper_version=zookeeper_version)

    log.info("Locking ZooKeeper directory for installation...")
    lock_file(installing_file_path, timeout=MC_INSTALL_TIMEOUT)

    # Waited for concurrent installation to finish?
    if __zookeeper_is_installed(dist_directory=dist_directory, zookeeper_version=zookeeper_version):
        log.info("While waiting for ZooKeeper directory to unlock, ZooKeeper got installed to said directory.")
        return

    zookeeper_dist_url = __zookeeper_dist_url(zookeeper_version=zookeeper_version)

    log.info("Downloading ZooKeeper %s from %s..." % (zookeeper_version, zookeeper_dist_url))
    zookeeper_tarball_dest_path = download_file_to_temp_path(source_url=zookeeper_dist_url)

    log.info("Extracting %s to %s..." % (zookeeper_tarball_dest_path, zookeeper_path))
    extract_tarball_to_directory(archive_file=zookeeper_tarball_dest_path,
                                 dest_directory=zookeeper_path,
                                 strip_root=True)

    log.info("Creating 'installed' file...")
    installed_file_path = __zookeeper_installed_file_path(dist_directory=dist_directory,
                                                          zookeeper_version=zookeeper_version)
    lock_file(installed_file_path)

    log.info("Removing lock file...")
    unlock_file(installing_file_path)

    if not __zookeeper_is_installed(dist_directory=dist_directory, zookeeper_version=zookeeper_version):
        raise McZooKeeperRunException("I've done everything but ZooKeeper is still not installed.")
Пример #7
0
def __run_solr(port: int,
               instance_data_dir: str,
               hostname: str = fqdn(),
               jvm_heap_size: str = None,
               start_jar_args: List[str] = None,
               jvm_opts: List[str] = None,
               connect_timeout: int = 120,
               dist_directory: str = MC_DIST_DIR,
               solr_version: str = MC_SOLR_VERSION) -> None:
    """Run Solr instance."""
    if jvm_opts is None:
        jvm_opts = MC_SOLR_STANDALONE_JVM_OPTS

    if start_jar_args is None:
        start_jar_args = []

    if not __solr_is_installed():
        l.info("Solr is not installed, installing...")
        __install_solr()

    solr_home_dir = __solr_home_path(solr_home_dir=MC_SOLR_HOME_DIR)
    if not os.path.isdir(solr_home_dir):
        raise McSolrRunException("Solr home directory '%s' does not exist." %
                                 solr_home_dir)

    solr_path = __solr_path(dist_directory=dist_directory,
                            solr_version=solr_version)

    if not os.path.isdir(instance_data_dir):
        l.info("Creating data directory at %s..." % instance_data_dir)
        mkdir_p(instance_data_dir)

    l.info("Updating collections at %s..." % instance_data_dir)
    collections = __collections(solr_home_dir=solr_home_dir)
    for collection_name, collection_path in sorted(collections.items()):
        l.info("Updating collection '%s'..." % collection_name)

        collection_conf_src_dir = os.path.join(collection_path, "conf")
        if not os.path.isdir(collection_conf_src_dir):
            raise McSolrRunException(
                "Configuration for collection '%s' at %s does not exist" %
                (collection_name, collection_conf_src_dir))

        collection_dst_dir = os.path.join(instance_data_dir, collection_name)
        mkdir_p(collection_dst_dir)

        # Remove and copy configuration in case it has changed
        # (don't symlink because Solr 5.5+ doesn't like those)
        collection_conf_dst_dir = os.path.join(collection_dst_dir, "conf")
        if os.path.lexists(collection_conf_dst_dir):
            l.debug("Removing old collection configuration in '%s'..." %
                    collection_conf_dst_dir)
            if os.path.islink(collection_conf_dst_dir):
                # Might still be a link from older Solr versions
                os.unlink(collection_conf_dst_dir)
            else:
                shutil.rmtree(collection_conf_dst_dir)

        l.info("Copying '%s' to '%s'..." %
               (collection_conf_src_dir, collection_conf_dst_dir))
        shutil.copytree(collection_conf_src_dir,
                        collection_conf_dst_dir,
                        symlinks=False)

        l.info("Updating core.properties for collection '%s'..." %
               collection_name)
        core_properties_path = os.path.join(collection_dst_dir,
                                            "core.properties")
        with open(core_properties_path, 'w') as core_properties_file:
            core_properties_file.write(
                """
#
# This file is autogenerated. Don't bother editing it!
#

name=%(collection_name)s
instanceDir=%(instance_dir)s
""" % {
                    "collection_name": collection_name,
                    "instance_dir": collection_dst_dir,
                })

    l.info("Symlinking shard configuration...")
    config_items_to_symlink = [
        "contexts",
        "etc",
        "modules",
        "resources",
        "solr.xml",
    ]
    for config_item in config_items_to_symlink:
        config_item_src_path = os.path.join(solr_home_dir, config_item)
        if not os.path.exists(config_item_src_path):
            raise McSolrRunException(
                "Expected configuration item '%s' does not exist" %
                config_item_src_path)

        # Recreate symlink just in case
        config_item_dst_path = os.path.join(instance_data_dir, config_item)
        if os.path.lexists(config_item_dst_path):
            if not os.path.islink(config_item_dst_path):
                raise McSolrRunException(
                    "Configuration item '%s' exists but is not a symlink." %
                    config_item_dst_path)
            os.unlink(config_item_dst_path)

        l.info("Symlinking '%s' to '%s'..." %
               (config_item_src_path, config_item_dst_path))
        relative_symlink(config_item_src_path, config_item_dst_path)

    jetty_home_path = __jetty_home_path(dist_directory=dist_directory,
                                        solr_version=solr_version)

    l.info("Symlinking libraries and JARs...")
    library_items_to_symlink = [
        "lib",
        "solr-webapp",
        "start.jar",
        "solr",
        "solr-webapp",
    ]
    for library_item in library_items_to_symlink:
        library_item_src_path = os.path.join(jetty_home_path, library_item)
        if not os.path.exists(library_item_src_path):
            raise McSolrRunException(
                "Expected library item '%s' does not exist" %
                library_item_src_path)

        # Recreate symlink just in case
        library_item_dst_path = os.path.join(instance_data_dir, library_item)
        if os.path.lexists(library_item_dst_path):
            if not os.path.islink(library_item_dst_path):
                raise McSolrRunException(
                    "Library item '%s' exists but is not a symlink." %
                    library_item_dst_path)
            os.unlink(library_item_dst_path)

        l.info("Symlinking '%s' to '%s'..." %
               (library_item_src_path, library_item_dst_path))
        relative_symlink(library_item_src_path, library_item_dst_path)

    log4j_properties_path = os.path.join(solr_home_dir, "resources",
                                         "log4j.properties")
    if not os.path.isfile(log4j_properties_path):
        raise McSolrRunException("log4j.properties at '%s' was not found.")

    start_jar_path = os.path.join(jetty_home_path, "start.jar")
    if not os.path.isfile(start_jar_path):
        raise McSolrRunException("start.jar at '%s' was not found." %
                                 start_jar_path)

    solr_webapp_path = os.path.abspath(
        os.path.join(jetty_home_path, "solr-webapp"))
    if not os.path.isdir(solr_webapp_path):
        raise McSolrRunException("Solr webapp dir at '%s' was not found." %
                                 solr_webapp_path)

    if not hostname_resolves(hostname):
        raise McSolrRunException("Hostname '%s' does not resolve." % hostname)

    if tcp_port_is_open(port=port):
        raise McSolrRunException("Port %d is already open on this machine." %
                                 port)

    __raise_if_old_shards_exist()

    args = ["java"]
    l.info("Starting Solr instance on %s, port %d..." % (hostname, port))

    if jvm_heap_size is not None:
        args += ["-Xmx%s" % jvm_heap_size]
    args += jvm_opts
    # noinspection SpellCheckingInspection
    args += [
        "-server",
        "-Djava.util.logging.config.file=file://" +
        os.path.abspath(log4j_properties_path),
        "-Djetty.base=%s" % instance_data_dir,
        "-Djetty.home=%s" % instance_data_dir,
        "-Djetty.port=%d" % port,
        "-Dsolr.solr.home=%s" % instance_data_dir,
        "-Dsolr.data.dir=%s" % instance_data_dir,
        "-Dhost=%s" % hostname,
        "-Dmediacloud.luceneMatchVersion=%s" % MC_SOLR_LUCENEMATCHVERSION,

        # write heap dump to data directory on OOM errors
        "-XX:+HeapDumpOnOutOfMemoryError",
        "-XX:HeapDumpPath=%s" % instance_data_dir,

        # needed for resolving paths to JARs in solrconfig.xml
        "-Dmediacloud.solr_dist_dir=%s" % solr_path,
        "-Dmediacloud.solr_webapp_dir=%s" % solr_webapp_path,
    ]
    args += start_jar_args
    args += [
        "-jar",
        start_jar_path,
        "--module=http",
    ]

    l.debug("Running command: %s" % ' '.join(args))

    process = subprocess.Popen(args)
    global __solr_pid
    __solr_pid = process.pid

    # Declare that we don't care about the exit code of the child process so
    # it doesn't become a zombie when it gets killed in signal handler
    signal.signal(signal.SIGCHLD, signal.SIG_IGN)

    signal.signal(signal.SIGTERM, __kill_solr_process
                  )  # SIGTERM is handled differently for whatever reason
    atexit.register(__kill_solr_process)

    l.info("Solr PID: %d" % __solr_pid)

    l.info("Solr is starting on port %d, will be available shortly..." % port)
    wait_for_tcp_port_to_open(port=port, retries=connect_timeout)

    l.info("Solr is running on port %d!" % port)
    while True:
        time.sleep(1)
Пример #8
0
def run_zookeeper(dist_directory: str = MC_DIST_DIR,
                  listen: str = MC_ZOOKEEPER_LISTEN,
                  port: int = MC_ZOOKEEPER_PORT,
                  data_dir: str = MC_SOLR_BASE_DATA_DIR,
                  zookeeper_version: str = MC_ZOOKEEPER_VERSION,
                  solr_version: str = MC_SOLR_VERSION) -> None:
    """Run ZooKeeper, install if needed too."""
    if not __zookeeper_is_installed():
        log.info("ZooKeeper is not installed, installing...")
        __install_zookeeper()

    data_dir = resolve_absolute_path_under_mc_root(path=data_dir,
                                                   must_exist=True)

    zookeeper_data_dir = os.path.join(data_dir, "mediacloud-cluster-zookeeper")
    if not os.path.isdir(zookeeper_data_dir):
        log.info("Creating data directory at %s..." % zookeeper_data_dir)
        mkdir_p(zookeeper_data_dir)

    if tcp_port_is_open(port=port):
        raise McZooKeeperRunException(
            "Port %d is already open on this machine." % port)

    zookeeper_path = __zookeeper_path(dist_directory=dist_directory,
                                      zookeeper_version=zookeeper_version)

    zkserver_path = os.path.join(zookeeper_path, "bin", "zkServer.sh")
    if not os.path.isfile(zkserver_path):
        raise McZooKeeperRunException("zkServer.sh at '%s' was not found." %
                                      zkserver_path)

    log4j_properties_path = os.path.join(zookeeper_path, "conf",
                                         "log4j.properties")
    if not os.path.isfile(log4j_properties_path):
        raise McZooKeeperRunException(
            "log4j.properties at '%s' was not found.")

    zoo_cnf_path = os.path.join(zookeeper_data_dir, "zoo.cfg")
    log.info("Creating zoo.cfg in '%s'..." % zoo_cnf_path)

    with open(zoo_cnf_path, 'w') as zoo_cnf:
        zoo_cnf.write("""
#
# This file is autogenerated. Please do not modify it!
#

clientPortAddress=%(listen)s
clientPort=%(port)d
dataDir=%(data_dir)s

# Must be between zkClientTimeout / 2 and zkClientTimeout / 20
tickTime=30000

initLimit=10
syncLimit=10
            """ % {
            "listen": listen,
            "port": port,
            "data_dir": zookeeper_data_dir,
        })

    zookeeper_env = os.environ.copy()
    zookeeper_env[
        "ZOOCFGDIR"] = zookeeper_data_dir  # Serves as configuration dir too
    zookeeper_env["ZOOCFG"] = "zoo.cfg"
    zookeeper_env["ZOO_LOG_DIR"] = zookeeper_data_dir
    zookeeper_env[
        "SERVER_JVMFLAGS"] = "-Dlog4j.configuration=file://" + os.path.abspath(
            log4j_properties_path)

    args = [zkserver_path, "start-foreground"]

    log.info("Starting ZooKeeper on %s:%d..." % (listen, port))
    log.debug("Running command: %s" % str(args))
    log.debug("Environment variables: %s" % str(zookeeper_env))

    process = subprocess.Popen(args, env=zookeeper_env)
    global __zookeeper_pid
    __zookeeper_pid = process.pid

    # Declare that we don't care about the exit code of the child process so
    # it doesn't become a zombie when it gets killed in signal handler
    signal.signal(signal.SIGCHLD, signal.SIG_IGN)

    signal.signal(signal.SIGTERM, __kill_zookeeper_process
                  )  # SIGTERM is handled differently for whatever reason
    atexit.register(__kill_zookeeper_process)

    log.info("ZooKeeper PID: %d" % __zookeeper_pid)

    log.info("Waiting for ZooKeeper to start at port %d..." % port)
    zookeeper_started = wait_for_tcp_port_to_open(
        port=port, retries=MC_ZOOKEEPER_CONNECT_RETRIES)
    if not zookeeper_started:
        raise McZooKeeperRunException(
            "Unable to connect to ZooKeeper at port %d" % port)

    log.info(
        "Uploading initial Solr collection configurations to ZooKeeper...")
    update_zookeeper_solr_configuration(zookeeper_host="localhost",
                                        zookeeper_port=port,
                                        dist_directory=dist_directory,
                                        solr_version=solr_version)

    log.info("ZooKeeper is ready on port %d!" % port)
    while True:
        time.sleep(1)
Пример #9
0
def run_zookeeper(dist_directory: str = MC_DIST_DIR,
                  listen: str = MC_ZOOKEEPER_LISTEN,
                  port: int = MC_ZOOKEEPER_PORT,
                  data_dir: str = MC_SOLR_BASE_DATA_DIR,
                  zookeeper_version: str = MC_ZOOKEEPER_VERSION,
                  solr_version: str = MC_SOLR_VERSION) -> None:
    """Run ZooKeeper, install if needed too."""
    if not __zookeeper_is_installed():
        log.info("ZooKeeper is not installed, installing...")
        __install_zookeeper()

    data_dir = resolve_absolute_path_under_mc_root(path=data_dir, must_exist=True)

    zookeeper_data_dir = os.path.join(data_dir, "mediacloud-cluster-zookeeper")
    if not os.path.isdir(zookeeper_data_dir):
        log.info("Creating data directory at %s..." % zookeeper_data_dir)
        mkdir_p(zookeeper_data_dir)

    if tcp_port_is_open(port=port):
        raise McZooKeeperRunException("Port %d is already open on this machine." % port)

    zookeeper_path = __zookeeper_path(dist_directory=dist_directory, zookeeper_version=zookeeper_version)

    zkserver_path = os.path.join(zookeeper_path, "bin", "zkServer.sh")
    if not os.path.isfile(zkserver_path):
        raise McZooKeeperRunException("zkServer.sh at '%s' was not found." % zkserver_path)

    log4j_properties_path = os.path.join(zookeeper_path, "conf", "log4j.properties")
    if not os.path.isfile(log4j_properties_path):
        raise McZooKeeperRunException("log4j.properties at '%s' was not found.")

    zoo_cnf_path = os.path.join(zookeeper_data_dir, "zoo.cfg")
    log.info("Creating zoo.cfg in '%s'..." % zoo_cnf_path)

    with open(zoo_cnf_path, 'w') as zoo_cnf:
        zoo_cnf.write("""
#
# This file is autogenerated. Please do not modify it!
#

clientPortAddress=%(listen)s
clientPort=%(port)d
dataDir=%(data_dir)s

# Must be between zkClientTimeout / 2 and zkClientTimeout / 20
tickTime=30000

initLimit=10
syncLimit=10
            """ % {
            "listen": listen,
            "port": port,
            "data_dir": zookeeper_data_dir,
        })

    zookeeper_env = os.environ.copy()
    zookeeper_env["ZOOCFGDIR"] = zookeeper_data_dir  # Serves as configuration dir too
    zookeeper_env["ZOOCFG"] = "zoo.cfg"
    zookeeper_env["ZOO_LOG_DIR"] = zookeeper_data_dir
    zookeeper_env["SERVER_JVMFLAGS"] = "-Dlog4j.configuration=file://" + os.path.abspath(log4j_properties_path)

    args = [
        zkserver_path,
        "start-foreground"
    ]

    log.info("Starting ZooKeeper on %s:%d..." % (listen, port))
    log.debug("Running command: %s" % str(args))
    log.debug("Environment variables: %s" % str(zookeeper_env))

    process = subprocess.Popen(args, env=zookeeper_env)
    global __zookeeper_pid
    __zookeeper_pid = process.pid

    # Declare that we don't care about the exit code of the child process so
    # it doesn't become a zombie when it gets killed in signal handler
    signal.signal(signal.SIGCHLD, signal.SIG_IGN)

    signal.signal(signal.SIGTERM, __kill_zookeeper_process)  # SIGTERM is handled differently for whatever reason
    atexit.register(__kill_zookeeper_process)

    log.info("ZooKeeper PID: %d" % __zookeeper_pid)

    log.info("Waiting for ZooKeeper to start at port %d..." % port)
    zookeeper_started = wait_for_tcp_port_to_open(port=port, retries=MC_ZOOKEEPER_CONNECT_RETRIES)
    if not zookeeper_started:
        raise McZooKeeperRunException("Unable to connect to ZooKeeper at port %d" % port)

    log.info("Uploading initial Solr collection configurations to ZooKeeper...")
    update_zookeeper_solr_configuration(zookeeper_host="localhost",
                                        zookeeper_port=port,
                                        dist_directory=dist_directory,
                                        solr_version=solr_version)

    log.info("ZooKeeper is ready on port %d!" % port)
    while True:
        time.sleep(1)
Пример #10
0
def __run_solr(
    port: int,
    instance_data_dir: str,
    hostname: str = fqdn(),
    jvm_heap_size: str = None,
    start_jar_args: List[str] = None,
    jvm_opts: List[str] = None,
    connect_timeout: int = 120,
    dist_directory: str = MC_DIST_DIR,
    solr_version: str = MC_SOLR_VERSION,
) -> None:
    """Run Solr instance."""
    if jvm_opts is None:
        jvm_opts = MC_SOLR_STANDALONE_JVM_OPTS

    if start_jar_args is None:
        start_jar_args = []

    if not __solr_is_installed():
        l.info("Solr is not installed, installing...")
        __install_solr()

    solr_home_dir = __solr_home_path(solr_home_dir=MC_SOLR_HOME_DIR)
    if not os.path.isdir(solr_home_dir):
        raise Exception("Solr home directory '%s' does not exist." % solr_home_dir)

    solr_path = __solr_path(dist_directory=dist_directory, solr_version=solr_version)

    if not os.path.isdir(instance_data_dir):
        l.info("Creating data directory at %s..." % instance_data_dir)
        mkdir_p(instance_data_dir)

    l.info("Updating collections at %s..." % instance_data_dir)
    collections = __collections(solr_home_dir=solr_home_dir)
    for collection_name, collection_path in sorted(collections.items()):
        l.info("Updating collection '%s'..." % collection_name)

        collection_conf_src_dir = os.path.join(collection_path, "conf")
        if not os.path.isdir(collection_conf_src_dir):
            raise Exception(
                "Configuration for collection '%s' at %s does not exist" % (collection_name, collection_conf_src_dir)
            )

        collection_dst_dir = os.path.join(instance_data_dir, collection_name)
        mkdir_p(collection_dst_dir)

        # Remove and copy configuration in case it has changed
        # (don't symlink because Solr 5.5+ doesn't like those)
        collection_conf_dst_dir = os.path.join(collection_dst_dir, "conf")
        if os.path.lexists(collection_conf_dst_dir):
            l.debug("Removing old collection configuration in '%s'..." % collection_conf_dst_dir)
            if os.path.islink(collection_conf_dst_dir):
                # Might still be a link from older Solr versions
                os.unlink(collection_conf_dst_dir)
            else:
                shutil.rmtree(collection_conf_dst_dir)

        l.info("Copying '%s' to '%s'..." % (collection_conf_src_dir, collection_conf_dst_dir))
        shutil.copytree(collection_conf_src_dir, collection_conf_dst_dir, symlinks=False)

        l.info("Updating core.properties for collection '%s'..." % collection_name)
        core_properties_path = os.path.join(collection_dst_dir, "core.properties")
        with open(core_properties_path, "w") as core_properties_file:
            core_properties_file.write(
                """
#
# This file is autogenerated. Don't bother editing it!
#

name=%(collection_name)s
instanceDir=%(instance_dir)s
"""
                % {"collection_name": collection_name, "instance_dir": collection_dst_dir}
            )

    l.info("Symlinking shard configuration...")
    config_items_to_symlink = ["contexts", "etc", "modules", "resources", "solr.xml"]
    for config_item in config_items_to_symlink:
        config_item_src_path = os.path.join(solr_home_dir, config_item)
        if not os.path.exists(config_item_src_path):
            raise Exception("Expected configuration item '%s' does not exist" % config_item_src_path)

        # Recreate symlink just in case
        config_item_dst_path = os.path.join(instance_data_dir, config_item)
        if os.path.lexists(config_item_dst_path):
            if not os.path.islink(config_item_dst_path):
                raise Exception("Configuration item '%s' exists but is not a symlink." % config_item_dst_path)
            os.unlink(config_item_dst_path)

        l.info("Symlinking '%s' to '%s'..." % (config_item_src_path, config_item_dst_path))
        relative_symlink(config_item_src_path, config_item_dst_path)

    jetty_home_path = __jetty_home_path(dist_directory=dist_directory, solr_version=solr_version)

    l.info("Symlinking libraries and JARs...")
    library_items_to_symlink = ["lib", "solr-webapp", "start.jar", "solr", "solr-webapp"]
    for library_item in library_items_to_symlink:
        library_item_src_path = os.path.join(jetty_home_path, library_item)
        if not os.path.exists(library_item_src_path):
            raise Exception("Expected library item '%s' does not exist" % library_item_src_path)

        # Recreate symlink just in case
        library_item_dst_path = os.path.join(instance_data_dir, library_item)
        if os.path.lexists(library_item_dst_path):
            if not os.path.islink(library_item_dst_path):
                raise Exception("Library item '%s' exists but is not a symlink." % library_item_dst_path)
            os.unlink(library_item_dst_path)

        l.info("Symlinking '%s' to '%s'..." % (library_item_src_path, library_item_dst_path))
        relative_symlink(library_item_src_path, library_item_dst_path)

    log4j_properties_path = os.path.join(solr_home_dir, "resources", "log4j.properties")
    if not os.path.isfile(log4j_properties_path):
        raise Exception("log4j.properties at '%s' was not found.")

    start_jar_path = os.path.join(jetty_home_path, "start.jar")
    if not os.path.isfile(start_jar_path):
        raise Exception("start.jar at '%s' was not found." % start_jar_path)

    solr_webapp_path = os.path.abspath(os.path.join(jetty_home_path, "solr-webapp"))
    if not os.path.isdir(solr_webapp_path):
        raise Exception("Solr webapp dir at '%s' was not found." % solr_webapp_path)

    if not hostname_resolves(hostname):
        raise Exception("Hostname '%s' does not resolve." % hostname)

    if tcp_port_is_open(port=port):
        raise Exception("Port %d is already open on this machine." % port)

    __raise_if_old_shards_exist()

    args = ["java"]
    l.info("Starting Solr instance on %s, port %d..." % (hostname, port))

    if jvm_heap_size is not None:
        args += ["-Xmx%s" % jvm_heap_size]
    args += jvm_opts
    # noinspection SpellCheckingInspection
    args += [
        "-server",
        "-Djava.util.logging.config.file=file://" + os.path.abspath(log4j_properties_path),
        "-Djetty.base=%s" % instance_data_dir,
        "-Djetty.home=%s" % instance_data_dir,
        "-Djetty.port=%d" % port,
        "-Dsolr.solr.home=%s" % instance_data_dir,
        "-Dsolr.data.dir=%s" % instance_data_dir,
        "-Dhost=%s" % hostname,
        "-Dmediacloud.luceneMatchVersion=%s" % MC_SOLR_LUCENEMATCHVERSION,
        # write heap dump to data directory on OOM errors
        "-XX:+HeapDumpOnOutOfMemoryError",
        "-XX:HeapDumpPath=%s" % instance_data_dir,
        # needed for resolving paths to JARs in solrconfig.xml
        "-Dmediacloud.solr_dist_dir=%s" % solr_path,
        "-Dmediacloud.solr_webapp_dir=%s" % solr_webapp_path,
    ]
    args += start_jar_args
    args += ["-jar", start_jar_path, "--module=http"]

    l.debug("Running command: %s" % " ".join(args))

    process = subprocess.Popen(args)
    global __solr_pid
    __solr_pid = process.pid

    # Declare that we don't care about the exit code of the child process so
    # it doesn't become a zombie when it gets killed in signal handler
    signal.signal(signal.SIGCHLD, signal.SIG_IGN)

    signal.signal(signal.SIGTERM, __kill_solr_process)  # SIGTERM is handled differently for whatever reason
    atexit.register(__kill_solr_process)

    l.info("Solr PID: %d" % __solr_pid)

    l.info("Solr is starting on port %d, will be available shortly..." % port)
    wait_for_tcp_port_to_open(port=port, retries=connect_timeout)

    l.info("Solr is running on port %d!" % port)
    while True:
        time.sleep(1)