示例#1
0
 def delete(self, path):
     master_server = rpc_call(self.master_addr)
     err = master_server.delete(path)
     if err:
         log.error("Error while deleting %s : %s", path, err)
     else:
         log.info("File Deleted Successfully")
示例#2
0
 def create(self, path):
     master_server = rpc_call(self.master_addr)
     resp, err = master_server.create(path)
     if resp:
         log.debug("Create API response %s", resp)
     else:
         log.error("Error creating file '%s'. Why? : %s", path, err)
示例#3
0
    def write_helper(self, path, chunk_index, start, end, data):
        chunk_handle, chunk_locations, err = self.get_chunk_guaranteed(
            path, chunk_index)

        if err:
            return False

        data_id = DataId(self.client_id, time.time())

        # Push data to all replicas' memory.
        err = self.push_data(chunk_locations, data_id, data)
        if err:
            log.error('Data not pushed to all replicas.')
            return False

        # Once data is pushed to all replicas, send write request to the primary replica.
        # primary = address of primary chunk server
        primary = self.find_lease_holder(chunk_handle)

        if not primary:
            log.error("Primary chunk server not found.")
            return False

        primary_cs = rpc_call(primary)
        err = primary_cs.write(data_id.client_id, data_id.timestamp, path,
                               chunk_index, chunk_handle, start,
                               chunk_locations)

        if err:
            return False

        return True
示例#4
0
    def getfilelength(self, path):
        """This function calls Master Server GetFileLength Function to
        get total length of the file"""
        master_server = rpc_call(self.master_addr)
        filelength, err = master_server.get_file_length(path)
        log.debug("%s length is: %s", path, filelength)

        return filelength, err
示例#5
0
    def push_data(self, chunk_locations, data_id, data):
        for srv_addr in chunk_locations:
            cs = rpc_call(srv_addr)
            err = cs.push_data(data_id.client_id, data_id.timestamp, data)
            if err:
                return err

        return None
示例#6
0
 def list_allfiles(self, path):
     master_server = rpc_call(self.master_addr)
     resp, err = master_server.list_allfiles(path)
     if resp:
         log.debug("List of files in %s:\n", path)
         for file in resp:
             log.debug("%s\n", file)
     else:
         log.error("Error creating file '%s'. Why? : %s", path, err)
示例#7
0
    def __init__(self, master_addr):
        self.master_addr = master_addr

        master_server = rpc_call(self.master_addr)
        # call master to get a unique client id
        self.client_id = master_server.unique_client_id()

        self.location_cache = {
        }  # TODO: implement cache with timeout. need some kind of expiring dict
        self.lease_holder_cache = {}  # TODO: implement cache with timeout
示例#8
0
 def apply_to_secondary(self, client_id, timestamp, path, chunk_index,
                        chunk_handle, offset, chunk_locations):
     #   // RPC each secondary chunkserver to apply the write.
     for address in chunk_locations:
         if address != self.my_addr:
             cs = rpc_call(address)
             err = cs.serialized_write(client_id, timestamp, path,
                                       chunk_index, chunk_handle, offset,
                                       chunk_locations, False)
             if err:
                 return err
     return None
示例#9
0
 def test_connection(self, chunk_server_addr):
     chunk_server = rpc_call(chunk_server_addr)
     try:
         # try to connect with chunkserver
         resp = chunk_server.delete_bad_chunk(self.chunks_to_delete)
         if resp:
             log.info("%s has deleted all bad chunks", chunk_server_addr)
             return True
         else:
             log.info("%s is unable to delete all bad chunk handle", chunk_server_addr)
             return True
     except ConnectionRefusedError:
         log.info("Unable to connect with %s", chunk_server_addr)
         return False
示例#10
0
 def read_helper(self, path, chunk_index, start, length):
     """Call Chunkserver RPC to read chunkdata"""
     chunk_handle, chunk_locations, err = self.find_chunk(path, chunk_index)
     if err:
         return None, err
     random_num = random.randint(
         1, min(len(chunk_locations),
                REPLICATION_FACTOR)) - 1  # -1 for zero based index
     chunk_loc = chunk_locations[random_num]
     log.debug("Chunk Handle  %s and chunk Locations %s ", chunk_handle,
               chunk_locations)
     chunk_server = rpc_call(chunk_loc)
     data, err = chunk_server.read(chunk_handle, start, length)
     # TODO :Handle case if server is down
     return data, err
示例#11
0
    def poll_chunkservers(self):
        """A one time polling function, runs when master is started to get list of chunks from active chunk servers
            and update the chunks_of_chunkserver dict."""
        log.debug("****Polling active chunkservers start***")
        for chunk_server in self.active_chunk_servers:
            log.debug("Polling chunkserver %s", chunk_server)
            cs = rpc_call(chunk_server)
            try:
                chunk_handles = cs.get_chunk_handles()
                # update chunks_of_chunkserver dict
                self.chunks_of_chunk_server[chunk_server] = chunk_handles
                log.debug("Polling complete for chunkserver: %s", chunk_server)
            except ConnectionRefusedError:
                log.error("Polling failed for chunkserver: %s", chunk_server)

        log.debug("****Polling active chunkservers end***")
示例#12
0
    def append(self, path, data):
        length = len(data)
        # First check if the size is valid.
        if length > APPEND_SIZE:
            log.error("ERROR: Data size exceeds append limit.")
            return "size limit exceeded"

        # To calculate chunkIndex we must get the length.
        filelength, err = self.getfilelength(path)
        if err:
            log.error("Error while fetching file length %s", err)
        else:
            log.debug("File length fetched from server %s", filelength)

        chunk_index = filelength // CHUNK_SIZE

        # Get chunkHandle and chunkLocations
        chunk_handle, chunk_locations, err = self.get_chunk_guaranteed(
            path, chunk_index)
        print("APPEND :: ", chunk_handle, chunk_locations, err)
        if err:
            return "can't get chunk handle location"

        # Construct dataId with clientId and current timestamp.
        data_id = DataId(self.client_id, time.time())

        # Push data to all replicas' memory.
        err = self.push_data(chunk_locations, data_id, data)
        if err:
            log.error('Data not pushed to all replicas.')
            return "Data not pushed to all replicas."

        # Once data is pushed to all replicas, send append request to the primary.
        primary = self.find_lease_holder(chunk_handle)

        if not primary:
            log.error("Primary chunk server not found.")
            return "Primary chunk server not found."

        # Make Append call to primary chunk server
        primary_cs = rpc_call(primary)
        offset = primary_cs.append(data_id.client_id, data_id.timestamp,
                                   chunk_handle, chunk_index, path,
                                   chunk_locations)
        print("offset = ", offset)
        return offset
示例#13
0
    def find_lease_holder(self, chunk_handle):
        key = f'{chunk_handle}'
        value = self.lease_holder_cache.get(key)
        if value:
            return value['primary']

        # If not found in cache, RPC the master server.
        ms = rpc_call(self.master_addr)
        primary, lease_ends, err = ms.find_lease_holder(chunk_handle)

        if not err:
            self.lease_holder_cache[key] = {
                'primary': primary,
                'lease_ends': lease_ends
            }
            return primary

        return None
示例#14
0
    def find_chunk(self, path, chunk_index):
        key = f'{path}:{chunk_index}'
        value = self.location_cache.get(key, None)
        if value:
            # cached value found
            return value.chunk_handle, value.chunk_locations, None

        # else: not found in cache, get from master server
        ms = rpc_call(self.master_addr)
        chunk_locations, chunk_handle, err = ms.find_locations(
            path, chunk_index)

        if not err:
            # Save into location cache
            chunk_info = ChunkInfo(chunk_handle, chunk_locations)
            self.location_cache[key] = chunk_info
            return chunk_handle, chunk_locations, err

        return None, None, err
示例#15
0
    def order_chunk_copy_from_peer(self, peer_address, chunk_handle):
        """This RPC is called by master to order a chunkserver to copy some chunks from a peer chunk server
        so as to meet the replication goal for that chunk."""
        peer_chunk_server = rpc_call(peer_address)
        # get chunk_info from peer
        chunk_index, path, length = peer_chunk_server.get_chunk_info_from_peer(
            chunk_handle)
        # get chunk's actual data
        data, err = peer_chunk_server.read(chunk_handle, 0, length)
        if err:
            log.error(err)
            return err

        # write data with that chunk_handle as filename to local filesystem
        filename = f"{chunk_handle}"

        err = self.apply_write(filename, data.data, 0)
        if err:
            return err

        self.report_chunk_info(chunk_handle, chunk_index, path, length, 0)
示例#16
0
def start_chunkserver(master_addr, my_ip, my_port, path):
    ensure_dir(path)  # make sure this path exists

    my_address = f'http://{my_ip}:{my_port}'
    metadata_filename = f'logs/ck_{my_port}.txt'

    cs = ChunkServer(my_address, master_addr, path, metadata_filename)

    # Load metadata
    load_metadata(cs)

    # tell master about the presence of this chunk server
    # and also send the list of chunks present here
    # must do this after loading from oplog
    ms = rpc_call(cs.master_addr)
    ms.notify_master(cs.my_addr, list(cs.chunks.keys()))

    chunk_server = SimpleXMLRPCServer((my_ip, my_port),
                                      logRequests=True,
                                      allow_none=True)

    chunk_server.register_introspection_functions()
    chunk_server.register_instance(cs)
    chunk_server.serve_forever()
示例#17
0
    def add_chunk(self, path, chunk_index):
        ms = rpc_call(self.master_addr)
        chunk_handle, chunk_locations, err = ms.add_chunk(path, chunk_index)

        return chunk_handle, chunk_locations, err
示例#18
0
    def beat(self):
        # FIXME: Simplify
        while True:
            time.sleep(HEARTBEAT_INTERVAL)
            log.debug("Heart Beating %s", self.locations)
            log.debug("Heart Beating %s", self.active_chunk_servers)

            # build list of dead chunk servers
            # by testing a connection to them
            dead_chunk_servers = [cs for cs in self.active_chunk_servers if not self.test_connection(cs)]

            log.debug("Dead chunk servers list = %s", dead_chunk_servers)

            # delete dead chunk server from active chunk servers list
            self.active_chunk_servers.difference_update(dead_chunk_servers)

            # loop over all chunk handles of dead chunk server
            for dead_chunk_server in dead_chunk_servers:
                # get list of chunks that need to be replicated
                chunk_handles = self.chunks_of_chunk_server.get(dead_chunk_server, [])

                for chunk_handle in chunk_handles:
                    chunk_info = self.locations.get(chunk_handle, None)

                    if chunk_info and dead_chunk_server in chunk_info.chunk_locations:
                        # remove dead chunkserver from chunk's chunk_info.chunk_locations
                        chunk_info.chunk_locations.remove(dead_chunk_server)

                        dest_cs = None
                        # if replication is needed
                        # and we have enough number of active chunkservers
                        # then perform replication
                        if REPLICATION_FACTOR - len(chunk_info.chunk_locations) > 0 \
                                and len(
                            self.active_chunk_servers) >= REPLICATION_FACTOR:  # TODO: Probably handle with semaphore

                            while True:
                                # keep looping until we pick a chunk server which does not already contain this chunk
                                # TODO: don't run infinitely, set a fixed max number of times this is executed
                                rand_loc = pick_randomly(self.active_chunk_servers, 1)[0]
                                if rand_loc not in chunk_info.chunk_locations:
                                    dest_cs = rand_loc
                                    break

                        if not dest_cs:
                            # if no valid destination chunkserver found, skip this chunks replication
                            continue

                        # else perform replication
                        # call order_chunk copy_from_peer
                        peer_address = pick_randomly(chunk_info.chunk_locations, 1)[0]
                        cs_proxy = rpc_call(dest_cs)

                        try:
                            err = cs_proxy.order_chunk_copy_from_peer(peer_address, chunk_handle)
                            if err:
                                log.info("Unable to replicate to %s due to %s", dest_cs, err)
                        except ConnectionRefusedError:
                            log.info("Unable to connect to %s for %s replication", dest_cs, chunk_handle)

                # delete dead_chunk_server from chunks_of_chunk_server_list
                # TODO: donot remove if replication was not performed
                self.chunks_of_chunk_server.pop(dead_chunk_server, None)
示例#19
0
def report_chunk(cs, chunk_info):
    ms = rpc_call(cs.master_addr)

    # TODO: receive returned error if any
    ms.report_chunk(cs.my_addr, chunk_info.chunk_handle,
                    chunk_info.chunk_index, chunk_info.length, chunk_info.path)