Пример #1
0
    def _notify_master_error(self, worker_ip):
        if self.__master_connection is None:
            self.__master_connection = socket.socket(socket.AF_INET, socket.SOCK_STREAM)

            # master should reply in 30 seconds or so...
            self.__master_connection.settimeout(30)
            master_address = (self.__config.get_conf_val("ms"), 4017)

            try:
                self.__master_connection.connect(master_address)

            except Exception as e:
                utils.print_err("Error: Master Connection failed in _notify_master_error() ")
                utils.print_err("Error: " + str(e))
        with self.__data_lock:
            self.__state = "new"
        if self.__master_connection is not None:
            cmd_send = "client client_uuid=" + self.__my_uuid + " image=" + self.__config.get_conf_val("image")
            if self.__req_uuid != "":
                cmd_send = cmd_send + " uuid=" + self.__req_uuid
            with self.__data_lock:
                cmd_send = cmd_send + " state=" + self.__state + " worker_ip="
            cmd_send = cmd_send +  worker_ip + " worker_state=error"

            #utils.print_err("Sending to master: " + cmd_send)
            self.__master_connection.sendall(cmd_send)
            self.__master_connection.recv(1024)
        else:
            utils.print_err("Error: Unable to notify master.")
            utils.print_err("Error: Stale connection likely present on master.")
Пример #2
0
    def run(self):
        """
        run the mprov client.
        :return:
        """

        self.__master_connection = socket.socket(socket.AF_INET,
                                                 socket.SOCK_STREAM)
        # master should reply in 30 seconds or so...
        self.__master_connection.settimeout(30)
        master_address = (self.__config.get_conf_val("ms"), 4017)

        try:
            self.__master_connection.connect(master_address)
        except Exception as e:
            utils.print_err("Error: Master Connection failed in run(). ")
            utils.print_err("Error: " + e.message)
            return 1

        # start the process with a call to register.
        # this sets up the timer functions and threads.
        self._register_with_master()

        # wait for the sync to start.
        while not self.__syncing:
            sleep(5)

        # wait on the sync thread.
        if self.__sync_thread is not None:
            self.__sync_thread.join()

        # wait on the control thread.
        if self.__ctrl_thread is not None:
            self.__ctrl_thread.join()

        # exit now and return the status.
        if self.__sync_result == "pass":
            return 0
        else:
            return 1
Пример #3
0
    def run(self):
        """
        run the mprov client.
        :return:
        """

        self.__master_connection = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        # master should reply in 30 seconds or so...
        self.__master_connection.settimeout(30)
        master_address = (self.__config.get_conf_val("ms"), 4017)

        utils.print_err("Attempting to connect to master: " + master_address[0])
        try:
            self.__master_connection.connect(master_address)
        except Exception as e:
            utils.print_err("Error: Master Connection failed in run(). ")
            utils.print_err("Error: " + str(e))


        # this lock will be released by the wait() call below, letting the register thread start.
        self.__thread_condition.acquire()

        # start the process with a call to register.
        # this sets up the timer functions and threads.
        #print("Starting register thread.")
        sys.stdout.flush()
        self.__hb_timer = threading.Thread(name="reg_thread", target=self._register_with_master).start()

        # wait for the sync to start.
        #print("Waiting for sync thread to start")
        sys.stdout.flush()
        while self.__sync_thread is None:
            #print(".")
            sys.stdout.write(".")
            sys.stdout.flush()
            self.__thread_condition.wait()

        #print("Waiting for sync to finish")
        sys.stdout.flush()

        # wait for the sync thread to finish
        while self.__sync_thread.isAlive():
            sys.stdout.write("^")
            sys.stdout.flush()
            self.__sync_thread.join(1)
            self.__thread_condition.wait()

        self.__thread_condition.release()
        # if we get here, tell the master we are done and cancel the timer.
        self.__hb_timer.cancel()
        #self._register_with_master()
        print "Sync complete."
        #print "Sync Thread Complete, waiting for control thread."

        # wait on the control thread.
        if self.__ctrl_thread is not None:
            self.__ctrl_thread.join()
        #print "Control thread complete"

        # wait on the hb timer.
        #self.__thread_condition.acquire()
        #self.__thread_condition.wait()

        # exit now and return the status.
        if self.__sync_result == "pass":
            #print "Exit 0"
            return 0
        else:
            #print "Exit 1"
            return 1
Пример #4
0
    def _handle_sync(self, worker_ip):

        if self.__retries > 5:
            utils.print_err("Error: Maximum retries (5) reached.  Exiting")
            sys.exit(-1)


        # connect to the worker and wait for it to reply that it's ready.
        # create a new socket for the worker connection.
        sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        worker_address = (worker_ip, 4018)

        # give the worker 1 minute to reply, he has to check with the master.
        sock.settimeout(60)

        try:
            sock.connect(worker_address)
            sock.sendall("sync client uuid=" + self.__req_uuid + " client_uuid=" + self.__my_uuid)
            packet = utils.parse_packet(sock.recv(1024))
            if packet is None:
                utils.print_err("Error: Worker " + worker_ip + " replied badly.  Will retry.")
                utils.print_err("Error Packet is empty")
                sock.close()
                self.__syncing = False
                self.__retries += 1
                with self.__data_lock:
                    self.__state = "new"
                return False
            if "ok" not in packet:
                utils.print_err("Error: Worker replied badly.  Will retry.")
                utils.print_err("Error Packet: "  + packet["raw_packet"])
                sock.close()
                self.__syncing = False
                self.__retries += 1
                with self.__data_lock:
                    self.__state = "new"
                return False
        except Exception as e:
            sock.close()
            utils.print_err("Error: Exception in worker comms: " + str(e))
            self.__syncing = False
            self.__retries += 1
            utils.print_err(("Error: Sending master worker IP: " + worker_ip))
            self._notify_master_error(worker_ip)
            with self.__data_lock:
                self.__state = "new"
            return False

        # worker is connected
        # Establish a control connection in a separate thread
        self.__ctrl_thread = threading.Thread(target=self._worker_control, args=(worker_address,))
        if self.__ctrl_thread is not None:
            self.__ctrl_thread.start()

        # generate the module_name
        module_name = str(uuid.uuid4())
        sync_port = "8971"  # TODO: make this dynamic, and random.

        # output the rsyncd config and a secret file
        rsyncd_fd, rsyncd_path = mkstemp()
        secrets_fd, secrets_path = mkstemp()

        rsyncd_file = open(rsyncd_path, "w")
        file_contents = "[" + module_name + "]\n" + \
                        "\tpath = " + self.__path + "\n" + \
                        "\tread only = no\n" + \
                        "\tauth users = root\n" + \
                        "\tsecrets file = " + secrets_path + "\n" + \
                        "\tuid = 0\n" + \
                        "\tgid = 0\n"

        rsyncd_file.write(file_contents)
        rsyncd_file.close()
        os.close(rsyncd_fd)

        # output the rsyncd secrets file.
        rsyncd_file = open(secrets_path, "w")
        rsyncd_file.write("root:" + self.__my_uuid)
        rsyncd_file.close()
        os.close(secrets_fd)
        print "Starting sync from worker: " + worker_address[0]
        # setup the rsyncd command.
        rsyncd_proc = subprocess.Popen(["/usr/bin/rsync",
                                        "--daemon",
                                        "--port=" + sync_port,
                                        "--no-detach",
                                        "-v",
                                        "-4",
                                        "--config=" + rsyncd_path], shell=False)
        self.__rsyncd_pid = rsyncd_proc.pid

        # tell the worker we are good to go.
        sock.sendall("ok client_uuid=" + self.__my_uuid + " port=" + sync_port + " module=" + module_name)

        # wait for the rsyncd to terminate.
        rsyncd_proc.communicate()
        os.remove(secrets_path)
        os.remove(rsyncd_path)

        #wait for the return packet to tell us if the sync was successful.
        try:
            res_packet = utils.parse_packet(sock.recv(1024))
            if res_packet is not None:
                if "ok" in res_packet:
                    if "result" in res_packet:
                        if res_packet['result'] == "error":
                            print "Worker Sync Failed! (Err: 101)"
                            self.__sync_result = 'error'
                            self._cancel_rsync()
                        elif res_packet['result'] == "pass":
                            print "Worker Sync Complete"
                            self.__sync_result = 'pass'
                    else:
                        print "Worker Sync Failed! (Err: 102)"
                        self.__sync_result = 'error'
                        self._cancel_rsync()
                else:
                    print "Worker Sync Failed! (Err: 103)"
                    self.__sync_result = 'error'
                    self._cancel_rsync()
            else:
                print "Worker Sync Failed! (Err: 104)"
                self.__sync_result = 'error'
                self._cancel_rsync()
        except socket.timeout:
            utils.print_err("Error: worker control comms. timeout.")
            print "Worker Sync Failed! (Err: 105)"
            self.__sync_result = 'error'
            self._cancel_rsync()

        sock.close()

        # set the state to done and let the register function handle letting the master know.
        with self.__data_lock:
            self.__state = "done"

        self.__syncing = False
        self.__exiting = True
        print "Exit Worker Thread"
Пример #5
0
    def _worker_control(self, worker_address):
        # Establish a control connection in a separate thread
        # connect to the worker and wait for it to reply that it's ready.
        # create a new socket for the worker connection.
        sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)

        # give the worker 1 minute to reply, he has to check with the master.
        sock.settimeout(60)

        sock.connect(worker_address)

        control_connection_good = False
        try:
            sock.sendall("control client uuid=" + self.__req_uuid)
            packet = utils.parse_packet(sock.recv(1024))
            if "ok" not in packet:
                utils.print_err("Error: Worker replied badly. Cannot continue.  Shutting down rsync ")
                sock.close()
                self.__syncing = False
                self._cancel_rsync()
            else:
                control_connection_good = True

        except Exception as e:
            sock.close()
            utils.print_err("Error: Exception in worker comms: " + e.message)
            self.__syncing = False
            self._cancel_rsync()
            with self.__data_lock:
                self.__state="new"

        if not control_connection_good:
            with self.__data_lock:
                self.__state = "new"
            self._cancel_rsync()
            sock.close()

            return False
        #print "** CTRL THREAD STARTING LOOP"
        while self.__syncing:
            # if we get here we have a good connection, so let's make sure it doesn't go anywhere
            try:
                #print("Recv")
                sock.sendall("ok")
                packet = utils.parse_packet(sock.recv(15))
                if "ok" not in packet:
                    #print "Exit, not ok"
                    self._cancel_rsync()
                    self.__syncing = False
                    sock.close()
                    return False
                #print("Recvd")
            except Exception as e:
                print e
                #print "Exit, exception"
                sock.close()
                self._cancel_rsync()
                self.__syncing = False
                return False
            # utils.print_err("worker hb: " + str(time()))
            # small sleep to loosen the tight loop
            sleep(1)
        #print "** Exit Loop"
        # we are no longer syncing, close everything down.
        sock.close()

        self._cancel_rsync()
        self.__syncing = False
        print("** CTRL THREAD END")
        return
Пример #6
0
    def _register_with_master(self):
        with self.__thread_condition:


            if self.__master_connection is None:
                self.__master_connection = socket.socket(socket.AF_INET, socket.SOCK_STREAM)

                # master should reply in 30 seconds or so...
                self.__master_connection.settimeout(30)
                master_address = (self.__config.get_conf_val("ms"), 4017)
                utils.print_err("Attempting to connect to master: " + master_address[0])
                try:
                    self.__master_connection.connect(master_address)
                except Exception as e:
                    utils.print_err("Error: Master Connection failed in _register_with_master(). ")
                    utils.print_err("Error: " + str(e))
                    self._restart_reg_timer()
                    self.__thread_condition.notify()
                    return

            try:

                cmd_send = "client client_uuid=" + self.__my_uuid + " image=" + self.__config.get_conf_val("image")
                with self.__data_lock:
                    if self.__req_uuid != "":
                        cmd_send = cmd_send + " uuid=" + self.__req_uuid
                        cmd_send = cmd_send + " state=" + self.__state
                    else:
                        cmd_send = cmd_send + " state=" + self.__state
                #print("Sending master: " + cmd_send)
                self.__master_connection.sendall(cmd_send)

                packet = utils.parse_packet(self.__master_connection.recv(1024))  # type: dict
                if packet is None:
                    utils.print_err("Error: Unable to parse packet.  Will retry")
                    self.__master_connection.close()
                    self.__master_connection = None
                print packet["raw_packet"]
                with self.__data_lock:
                    if self.__state == "done" or self.__state == "syncing":
                        if self.__state == "syncing":
                            self._restart_reg_timer()
                        self.__thread_condition.notify()
                        print "Sent state: " + self.__state
                        return
                if "ok" not in packet:
                    if "err" in packet:
                        utils.print_err("No worker found, waiting for more workers...")

                    else:
                        utils.print_err("Error: Master Server responded poorly to our register request. Will retry.")
                        utils.print_err("Error: Master Server Said: '" + packet["raw_packet"] + "'")
                    self.__master_connection.close()
                    self.__master_connection = None
                else:


                    # print "Master replied ok to us."
                    self.__req_uuid = packet["uuid"]

                    worker_ip = packet["worker_ip"]
                    if worker_ip is None:
                        # we don't have a worker.  So exit.
                        self._restart_reg_timer()
                        self.__thread_condition.notify()
                        return

                    # thread off to connect to the worker and start the sync
                    # set out syncing flag, so we know below that we shouldn't try to spawn another thread.

                    if not self.__syncing and self.__state == "new":
                        with self.__data_lock:
                            self.__syncing = True
                            self.__state = "syncing"
                        # now let's start the thread to talk to the worker.
                        self.__sync_thread = threading.Thread(target=self._handle_sync, args=(worker_ip,))
                        if self.__sync_thread is not None:
                            self.__sync_thread.start()

                # set this function up as a re-occuring timer based on the -b/--heartbeat option.
                self._restart_reg_timer()
            except Exception as e:
                if self.__master_connection is not None:
                    self.__master_connection.close()
                self.__master_connection = None
                utils.print_err("Error: Problem communicating with master server. Will Retry")
                self._restart_reg_timer()
            self.__thread_condition.notify()
Пример #7
0
    def _handle_sync(self, worker_ip):
        # connect to the worker and wait for it to reply that it's ready.
        # create a new socket for the worker connection.
        sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        worker_address = (worker_ip, 4018)

        # give the worker 1 minute to reply, he has to check with the master.
        sock.settimeout(60)

        try:
            sock.connect(worker_address)
            sock.sendall("sync client uuid=" + self.__req_uuid +
                         " client_uuid=" + self.__my_uuid)
            packet = utils.parse_packet(sock.recv(1024))
            if "ok" not in packet:
                utils.print_err("Error: Worker replied badly.  Will retry.")
                sock.close()
                self.__syncing = False
                self.__retries += 1
                return False
        except Exception as e:
            sock.close()
            utils.print_err("Error: Exception in worker comms: " + e.message)
            self.__syncing = False
            self.__retries += 1
            utils.print_err(("Error: Sending master worker IP: " + worker_ip))
            self._notify_master_error(worker_ip)
            return False

        # worker is connected
        # Establish a control connection in a separate thread
        self.__ctrl_thread = threading.Thread(target=self._worker_control,
                                              args=(worker_address, ))
        if self.__ctrl_thread is not None:
            self.__ctrl_thread.start()

        # generate the module_name
        module_name = str(uuid.uuid4())
        sync_port = "8971"  # TODO: make this dynamic, and random.

        # output the rsyncd config and a secret file
        rsyncd_fd, rsyncd_path = mkstemp()
        secrets_fd, secrets_path = mkstemp()

        rsyncd_file = open(rsyncd_path, "w")
        file_contents = "[" + module_name + "]\n" + \
                        "\tpath = " + self.__path + "\n" + \
                        "\tread only = no\n" + \
                        "\tauth users = root\n" + \
                        "\tsecrets file = " + secrets_path + "\n" + \
                        "\tuid = 0\n" + \
                        "\tgid = 0\n"

        rsyncd_file.write(file_contents)
        rsyncd_file.close()
        os.close(rsyncd_fd)

        # output the rsyncd secrets file.
        rsyncd_file = open(secrets_path, "w")
        rsyncd_file.write("root:" + self.__my_uuid)
        rsyncd_file.close()
        os.close(secrets_fd)
        print "Starting sync from worker."
        # setup the rsyncd command.
        rsyncd_proc = subprocess.Popen([
            "/usr/bin/rsync", "--daemon", "--port=" + sync_port, "--no-detach",
            "-v", "-4", "--config=" + rsyncd_path
        ],
                                       shell=False)
        self.__rsyncd_pid = rsyncd_proc.pid

        # tell the worker we are good to go.
        sock.send("ok client_uuid=" + self.__my_uuid + " port=" + sync_port +
                  " module=" + module_name + "\n")

        # wait for the rsyncd to terminate.
        rsyncd_proc.communicate()
        os.remove(secrets_path)
        os.remove(rsyncd_path)

        #wait for the return packet to tell us if the sync was successful.
        try:
            res_packet = utils.parse_packet(sock.recv(1024))
            if res_packet is not None:
                if "ok" in res_packet:
                    if "result" in res_packet:
                        if res_packet['result'] == "error":
                            print "Worker Sync Failed! (Err: 101)"
                            self.__sync_result = 'error'
                        elif res_packet['result'] == "pass":
                            print "Worker Sync Complete"
                            self.__sync_result = 'pass'
                    else:
                        print "Worker Sync Failed! (Err: 102)"
                        self.__sync_result = 'error'
                else:
                    print "Worker Sync Failed! (Err: 103)"
                    self.__sync_result = 'error'
            else:
                print "Worker Sync Failed! (Err: 104)"
                self.__sync_result = 'error'
        except socket.timeout:
            utils.print_err("Error: worker control comms. timeout.")
            print "Worker Sync Failed! (Err: 105)"
            self.__sync_result = 'error'

        sock.close()

        if self.__master_connection is None:
            self.__master_connection = socket.socket(socket.AF_INET,
                                                     socket.SOCK_STREAM)

            # master should reply in 30 seconds or so...
            self.__master_connection.settimeout(30)
            master_address = (self.__config.get_conf_val("ms"), 4017)

            try:
                self.__master_connection.connect(master_address)

            except Exception as e:
                utils.print_err(
                    "Error: Master Connection failed in _handle_sync() ")
                utils.print_err("Error: " + e.message)

        if self.__master_connection is not None:
            cmd_send = "client client_uuid=" + self.__my_uuid + " image=" + self.__config.get_conf_val(
                "image")
            if self.__req_uuid != "":
                cmd_send = cmd_send + " uuid=" + self.__req_uuid
            cmd_send = cmd_send + " state=done"

            cmd_send += "\n"

            cmd_send = "client client_uuid=" + self.__my_uuid + \
                       " image=" + self.__config.get_conf_val("image")
            if self.__req_uuid != "":
                cmd_send = cmd_send + " uuid=" + self.__req_uuid
            cmd_send = cmd_send + " state=done"
            cmd_send += "\n"
            utils.print_err("Sending to master: " + cmd_send)
            self.__master_connection.send(cmd_send)
            self.__master_connection.recv(1024)
        else:
            utils.print_err("Error: Unable to notify master.")
            utils.print_err(
                "Error: Stale connection likely present on master.")

        self.__syncing = False
        self.__exiting = True
Пример #8
0
and be directed to a 'worker' who will actually perform the rsync.
"""
import signal

import mprov.Config
from mprov.utils import print_err
if __name__ == "__main__":
    ''' Create a new Config object. '''
    config = mprov.Config.Config()
    ''' A variable to hold what program we are going to launch '''
    program = None
    ''' Now figure out what program to launch based off the command line args. '''
    if config.get_conf_val("master"):
        import mprov.MasterServer
        program = mprov.MasterServer.MasterServer(config)
    elif config.get_conf_val("worker"):
        import mprov.WorkerServer
        program = mprov.WorkerServer.WorkerServer(config)
    elif config.get_conf_val("client"):
        import mprov.Client
        program = mprov.Client.Client(config)
    else:
        ''' XXX: Should never get here, but in case we do... '''
        print_err("Error: Not sure what to run!? How'd you do that?!?!\n\n")
        exit(1)
    ''' hand off to the program the user wants. '''

    signal.signal(signal.SIGINT, program.signal_handler)
    return_code = program.run()
    exit(return_code)