Exemplo n.º 1
0
class DHTOperator(Operator):
    def __init__(self, self_address, home_dir='/tmp/', certfile=None, is_init_node=False, node_name='unknown'):
        Operator.__init__(self, self_address, home_dir, certfile, is_init_node, node_name)

        self.status = DS_INITIALIZE
        self.ranges_table = HashRangesTable()
        if is_init_node:
            self.ranges_table.append(MIN_HASH, MAX_HASH, self.self_address)

        self.save_path = os.path.join(home_dir, 'dht_range')
        if not os.path.exists(self.save_path):
            os.mkdir(self.save_path)

        self.__dht_range = FSHashRanges.discovery_range(self.save_path)
        self.__split_requests_cache = []
        self.__start_dht_try_count = 0
        self.__init_dht_thread = None
        if is_init_node:
            self.status = DS_NORMALWORK

        self.__check_hash_table_thread = CheckLocalHashTableThread(self)
        self.__check_hash_table_thread.setName('%s-CheckLocalHashTableThread'%self.node_name)
        self.__check_hash_table_thread.start()

        self.__monitor_dht_ranges = MonitorDHTRanges(self)
        self.__monitor_dht_ranges.setName('%s-MonitorDHTRanges'%self.node_name)
        self.__monitor_dht_ranges.start()


    def get_statistic(self):
        stat = Operator.get_statistic(self)
        dht_range = self.get_dht_range()

        stat['status'] = self.status
        stat['range_start'] = '%040x'% dht_range.get_start()
        stat['range_end'] = '%040x'% dht_range.get_end()
        stat['range_size'] = dht_range.get_range_size()
        stat['replicas_size'] = dht_range.get_replicas_size()
        stat['free_size'] = dht_range.get_free_size()
        return stat


    def on_neigbour_not_respond(self, neighbour_type, neighbour_address):
        if neighbour_type != NT_SUPERIOR:
            return

        for range_obj in self.ranges_table.iter_table():
            if range_obj.node_address == neighbour_address:
                self._move_range(range_obj)
                break


    def _move_range(self, range_obj):
        logger.info('Node %s went from DHT. Updating hash range table on network...'%range_obj.node_address)
        rm_lst = [(range_obj.start, range_obj.end, range_obj.node_address)]
        parameters = {'append': [], 'remove': rm_lst}

        req = FabnetPacketRequest(method='UpdateHashRangeTable', sender=self.self_address, parameters=parameters)
        self.call_network(req)


    def stop(self):
        self.status = DS_DESTROYING
        for range_obj in self.ranges_table.iter_table():
            if range_obj.node_address == self.self_address:
                self._move_range(range_obj)
                break

        Operator.stop(self)

        self.__check_hash_table_thread.stop()
        self.__monitor_dht_ranges.stop()

        self.__check_hash_table_thread.join()
        self.__monitor_dht_ranges.join()

    def __get_next_max_range(self):
        max_range = None
        for range_obj in self.ranges_table.iter_table():
            if range_obj.node_address in self.__split_requests_cache:
                continue

            if not max_range:
                max_range = range_obj
                continue

            if max_range.length() < range_obj.length():
                max_range = range_obj

        return max_range


    def __get_next_range_near(self, start, end):
        found_range = self.ranges_table.find(start)
        if found_range and found_range.node_address not in self.__split_requests_cache:
            return found_range

        found_range = self.ranges_table.find(end)
        if found_range and found_range.node_address not in self.__split_requests_cache:
            return found_range

        return None

    def set_status_to_normalwork(self):
        logger.info('Changing node status to NORMALWORK')
        self.status = DS_NORMALWORK
        self.__split_requests_cache = []
        self.__start_dht_try_count = 0


    def start_as_dht_member(self):
        if self.status == DS_DESTROYING:
            return

        self.status = DS_INITIALIZE
        dht_range = self.get_dht_range()

        nochange = False
        curr_start = dht_range.get_start()
        curr_end = dht_range.get_end()

        if dht_range.is_max_range() or self.__split_requests_cache:
            new_range = self.__get_next_max_range()
        else:
            new_range = self.__get_next_range_near(curr_start, curr_end)
            if new_range:
                if (new_range.start != curr_start or new_range.end != curr_end):
                    nochange = True
                if new_range.node_address == self.self_address:
                    self.set_status_to_normalwork()
                    return


        if new_range is None:
            #wait and try again
            if self.__start_dht_try_count == DHT_CYCLE_TRY_COUNT:
                logger.error('Cant initialize node as a part of DHT')
                self.__start_dht_try_count = 0
                return

            logger.info('No ready range for me on network... So, sleep and try again')
            self.__start_dht_try_count += 1
            self.__split_requests_cache = []
            time.sleep(WAIT_RANGE_TIMEOUT)
            return self.start_as_dht_member()

        if nochange:
            new_dht_range = dht_range
        else:
            new_dht_range = FSHashRanges(long(new_range.start + new_range.length()/2+1), long(new_range.end), self.save_path)
            self.update_dht_range(new_dht_range)
            new_dht_range.restore_from_trash() #try getting new range data from trash

        self.__split_requests_cache.append(new_range.node_address)

        logger.info('Call SplitRangeRequest to %s'%(new_range.node_address,))
        parameters = { 'start_key': new_dht_range.get_start(), 'end_key': new_dht_range.get_end() }
        req = FabnetPacketRequest(method='SplitRangeRequest', sender=self.self_address, parameters=parameters)
        ret_code, ret_msg = self.call_node(new_range.node_address, req)
        if ret_code != RC_OK:
            logger.error('Cant start SplitRangeRequest operation on node %s. Details: %s'%(new_range.node_address, ret_msg))
            return self.start_as_dht_member()


    def get_dht_range(self):
        self._lock()
        try:
            return self.__dht_range
        finally:
            self._unlock()

    def update_dht_range(self, new_dht_range):
        self._lock()
        old_dht_range = self.__dht_range
        self.__dht_range = new_dht_range
        self._unlock()

        old_dht_range.move_to_trash()

        dht_range = self.get_dht_range()
        logger.info('New node range: %040x-%040x'%(dht_range.get_start(), dht_range.get_end()))

    def check_dht_range(self):
        if self.status == DS_INITIALIZE:
            return

        dht_range = self.get_dht_range()
        start = dht_range.get_start()
        end = dht_range.get_end()

        range_obj = self.ranges_table.find(start)
        if not range_obj or range_obj.start != start or range_obj.end != end or range_obj.node_address != self.self_address:
            logger.error('DHT range on this node is not found in ranges_table')
            logger.info('Trying reinit node as DHT member...')
            self.start_as_dht_member()
Exemplo n.º 2
0
class DHTOperator(Operator):
    OPTYPE = 'DHT'

    def __init__(self, self_address, home_dir='/tmp/', key_storage=None, \
                        is_init_node=False, node_name='unknown', config={}):
        cur_cfg = {}
        cur_cfg.update(DEFAULT_DHT_CONFIG)
        cur_cfg.update(config)
        Operator.__init__(self, self_address, home_dir, key_storage, \
                                        is_init_node, node_name, cur_cfg)

        self.status = DS_INITIALIZE
        self.ranges_table = HashRangesTable()
        if is_init_node:
            self.ranges_table.append(MIN_HASH, MAX_HASH, self.self_address)

        self.save_path = os.path.join(home_dir, 'dht_range')
        if not os.path.exists(self.save_path):
            os.mkdir(self.save_path)

        self.__split_requests_cache = []
        self.__dht_range = FSHashRanges.discovery_range(self.save_path, ret_full=is_init_node)
        self.__start_dht_try_count = 0
        self.__init_dht_thread = None
        if is_init_node:
            self.status = DS_NORMALWORK

        self.__check_hash_table_thread = CheckLocalHashTableThread(self)
        self.__check_hash_table_thread.setName('%s-CheckLocalHashTableThread'%self.node_name)
        self.__check_hash_table_thread.start()

        self.__monitor_dht_ranges = MonitorDHTRanges(self)
        self.__monitor_dht_ranges.setName('%s-MonitorDHTRanges'%self.node_name)
        self.__monitor_dht_ranges.start()

    def get_status(self):
        return self.status

    def on_statisic_request(self):
        stat = Operator.on_statisic_request(self)
        dht_range = self.get_dht_range()

        dht_i = {}
        dht_i['status'] = self.status
        dht_i['range_start'] = '%040x'% dht_range.get_start()
        dht_i['range_end'] = '%040x'% dht_range.get_end()
        dht_i['range_size'] = dht_range.get_range_size()
        dht_i['replicas_size'] = dht_range.get_replicas_size()
        dht_i['free_size'] = dht_range.get_free_size()
        dht_i['free_size_percents'] = dht_range.get_free_size_percents()
        stat['DHTInfo'] = dht_i
        return stat

    def _move_range(self, range_obj):
        logger.info('Node %s went from DHT. Updating hash range table on network...'%range_obj.node_address)
        rm_lst = [(range_obj.start, range_obj.end, range_obj.node_address)]
        parameters = {'append': [], 'remove': rm_lst}

        req = FabnetPacketRequest(method='UpdateHashRangeTable', sender=self.self_address, parameters=parameters)
        self.call_network(req)

    def _take_range(self, range_obj):
        logger.info('Take node old range. Updating hash range table on network...')
        app_lst = [(range_obj.start, range_obj.end, range_obj.node_address)]
        parameters = {'append': app_lst, 'remove': []}

        req = FabnetPacketRequest(method='UpdateHashRangeTable', sender=self.self_address, parameters=parameters)
        self.call_network(req)


    def stop_inherited(self):
        self.status = DS_DESTROYING
        for range_obj in self.ranges_table.iter_table():
            if range_obj.node_address == self.self_address:
                self._move_range(range_obj)
                break

        self.__check_hash_table_thread.stop()
        self.__monitor_dht_ranges.stop()
        time.sleep(Config.DHT_STOP_TIMEOUT)
        self.__check_hash_table_thread.join()
        self.__monitor_dht_ranges.join()

    def __get_next_max_range(self):
        max_range = None
        for range_obj in self.ranges_table.iter_table():
            if range_obj.node_address == self.self_address:
                return range_obj

            if range_obj.node_address in self.__split_requests_cache:
                continue

            if not max_range:
                max_range = range_obj
                continue

            if max_range.length() < range_obj.length():
                max_range = range_obj

        if not max_range:
            return None

        return HashRange(long(max_range.start+max_range.length()/2+1), long(max_range.end), max_range.node_address)

    def __normalize_range_request(self, c_start, c_end, f_range):
        r1 = r2 = None
        if f_range.contain(c_start):
            r1 = HashRange(c_start, f_range.end, f_range.node_address)
        if f_range.contain(c_end):
            r2 = HashRange(f_range.start, c_end, f_range.node_address)

        if r1 and r2:
            if r1.length() < r2.length():
                return r1
            return r2

        if r1:
            return r1
        return r2

    def __get_next_range_near(self, start, end):
        ret_range = None
        found_range = self.ranges_table.find(start)
        if found_range and found_range.node_address not in self.__split_requests_cache:
            ret_range = self.__normalize_range_request(start, end, found_range)

        if found_range and found_range.contain(end):
            return ret_range

        #case when current node range is splited between two other nodes
        found_range = self.ranges_table.find(end)
        if found_range and found_range.node_address not in self.__split_requests_cache:
            ret_range_e = self.__normalize_range_request(start, end, found_range)
            if (not ret_range) or (ret_range_e and ret_range_e.length() > ret_range.length()):
                ret_range = ret_range_e

        if not ret_range:
            ret_range = HashRange(start, end, self.self_address)

        return ret_range

    def set_status_to_normalwork(self, save_range=False):
        logger.info('Changing node status to NORMALWORK')
        self.status = DS_NORMALWORK
        self.__split_requests_cache = []
        self.__start_dht_try_count = 0
        if save_range:
            dht_range = self.get_dht_range()
            dht_range.save_range()

    def start_as_dht_member(self):
        if self.status == DS_DESTROYING:
            return

        self.status = DS_INITIALIZE
        dht_range = self.get_dht_range()

        curr_start = dht_range.get_start()
        curr_end = dht_range.get_end()

        last_range = dht_range.get_last_range()
        if last_range and not self.__split_requests_cache:
            new_range = self.__get_next_range_near(last_range[0], last_range[1])
        elif dht_range.is_max_range() or self.__split_requests_cache:
            new_range = self.__get_next_max_range()
        else:
            new_range = self.__get_next_range_near(curr_start, curr_end)

        if new_range is None:
            #wait and try again
            if self.__start_dht_try_count == Config.DHT_CYCLE_TRY_COUNT:
                logger.error('Cant initialize node as a part of DHT')
                self.__start_dht_try_count = 0
                return

            logger.info('No ready range for me on network... So, sleep and try again')
            self.__start_dht_try_count += 1
            self.__split_requests_cache = []
            time.sleep(Config.WAIT_RANGE_TIMEOUT)
            return self.start_as_dht_member()

        if (new_range.start == curr_start and new_range.end == curr_end):
            new_dht_range = dht_range
        else:
            new_dht_range = FSHashRanges(long(new_range.start), long(new_range.end), self.save_path)
            self.update_dht_range(new_dht_range)
            new_dht_range.restore_from_reservation() #try getting new range data from reservation

        if new_range.node_address == self.self_address:
            self._take_range(new_range)
            self.set_status_to_normalwork()
            return

        self.__split_requests_cache.append(new_range.node_address)

        logger.info('Call SplitRangeRequest [%040x-%040x] to %s'% \
                (new_dht_range.get_start(), new_dht_range.get_end(), new_range.node_address,))
        parameters = { 'start_key': new_dht_range.get_start(), 'end_key': new_dht_range.get_end() }
        req = FabnetPacketRequest(method='SplitRangeRequest', sender=self.self_address, parameters=parameters)
        self.call_node(new_range.node_address, req)

    def get_dht_range(self):
        self._lock()
        try:
            return self.__dht_range
        finally:
            self._unlock()

    def update_dht_range(self, new_dht_range):
        self._lock()
        old_dht_range = self.__dht_range
        self.__dht_range = new_dht_range
        self._unlock()

        old_dht_range.move_to_reservation()

        dht_range = self.get_dht_range()
        logger.info('New node range: %040x-%040x'%(dht_range.get_start(), dht_range.get_end()))

    def check_dht_range(self, reinit=True):
        if self.status == DS_INITIALIZE:
            return

        dht_range = self.get_dht_range()
        if dht_range.get_subranges():
            return

        start = dht_range.get_start()
        end = dht_range.get_end()

        range_obj = self.ranges_table.find(start)
        if not range_obj:
            range_obj = self.ranges_table.find(end)
        if not range_obj or range_obj.start != start or range_obj.end != end or range_obj.node_address != self.self_address:
            msg = 'Invalid self range!'
            if range_obj:
                msg += ' hash table range - [%040x-%040x]%s... my range - [%040x-%040x]%s'%\
                        (range_obj.start, range_obj.end, range_obj.node_address, start, end, self.self_address)
            else:
                msg += 'Not found in hash table'
            logger.info(msg)

            if (not range_obj) or reinit:
                logger.warning('DHT range on this node is not found in ranges_table')
                if range_obj:
                    logger.info('Self range: %040x-%040x, In hash table: %040x-%040x(%s)'%\
                        (start, end, range_obj.start, range_obj.end, range_obj.node_address))
                logger.info('Trying reinit node as DHT member...')
                self.start_as_dht_member()
            return True

    def check_near_range(self, reinit_dht=False):
        if self.status != DS_NORMALWORK:
            return

        failed_range = self.check_dht_range(reinit=reinit_dht)
        if failed_range:
            return

        self._lock()
        try:
            self_dht_range = self.get_dht_range()

            if self_dht_range.get_end() != MAX_HASH:
                next_range = self.ranges_table.find(self_dht_range.get_end()+1)
                if not next_range:
                    next_exists_range = self.ranges_table.find_next(self_dht_range.get_end()-1)
                    if next_exists_range:
                        end = next_exists_range.start-1
                    else:
                        end = MAX_HASH
                    new_dht_range = self_dht_range.extend(self_dht_range.get_end()+1, end)
                    self.update_dht_range(new_dht_range)

                    rm_lst = [(self_dht_range.get_start(), self_dht_range.get_end(), self.self_address)]
                    append_lst = [(new_dht_range.get_start(), new_dht_range.get_end(), self.self_address)]

                    logger.info('Extended range by next neighbours')

                    req = FabnetPacketRequest(method='UpdateHashRangeTable', \
                            sender=self.self_address, parameters={'append': append_lst, 'remove': rm_lst})
                    self.call_network(req)
                    return

            first_range = self.ranges_table.find(MIN_HASH)
            if not first_range:
                first_range = self.ranges_table.get_first()
                if not first_range:
                    return
                if first_range.node_address == self.self_address:
                    new_dht_range = self_dht_range.extend(MIN_HASH, first_range.start-1)
                    self.update_dht_range(new_dht_range)
                    rm_lst = [(self_dht_range.get_start(), self_dht_range.get_end(), self.self_address)]
                    append_lst = [(new_dht_range.get_start(), new_dht_range.get_end(), self.self_address)]

                    logger.info('Extended range by first range')

                    req = FabnetPacketRequest(method='UpdateHashRangeTable', \
                            sender=self.self_address, parameters={'append': append_lst, 'remove': rm_lst})
                    self.call_network(req)
        finally:
            self._unlock()


    def extend_range(self, subrange_size, start_key, end_key):
        dht_range = self.get_dht_range()
        if dht_range.get_subranges():
            raise Exception('Local range is spliited at this time...')

        subrange_size = int(subrange_size)
        estimated_data_size_perc = dht_range.get_estimated_data_percents(subrange_size)

        if estimated_data_size_perc >= Config.MAX_USED_SIZE_PERCENTS:
            raise Exception('Subrange is so big for this node ;(')

        old_range = self.ranges_table.find(start_key)
        if old_range is None:
            raise Exception('No "parent" range found for subrange [%040x-%040x] in distributed ranges table'%(start_key, end_key))

        new_range = dht_range.extend(start_key, end_key)

        if old_range.start < start_key:
            new_foreign_range = (old_range.start, start_key-1, old_range.node_address)
        else:
            new_foreign_range = (end_key+1, old_range.end, old_range.node_address)

        old_foreign_range = (old_range.start, old_range.end, old_range.node_address)
        append_lst = [(new_range.get_start(), new_range.get_end(), self.self_address)]
        append_lst.append(new_foreign_range)
        rm_lst = [(dht_range.get_start(), dht_range.get_end(), self.self_address)]
        rm_lst.append(old_foreign_range)

        self.update_dht_range(new_range)

        req = FabnetPacketRequest(method='UpdateHashRangeTable', \
                    sender=self.self_address,\
                    parameters={'append': append_lst, 'remove': rm_lst})
        self.call_network(req)

    def get_data_block_path(self, key, is_replica):
        return self.get_dht_range().get_path(key, is_replica)

    def delete_data_block(self, key, is_replica, user_id, carefully_delete=True):
        self.get_dht_range().delete_data_block(key, is_replica, user_id, carefully_delete)

    def get_tempfile(self):
        return self.get_dht_range().tempfile()

    def join_subranges(self):
        self.get_dht_range().join_subranges()

    def put_data_block(self, key, tempfile_path, is_replica, carefully_save):
        self.get_dht_range().put(key, tempfile_path, is_replica, carefully_save)

    def get_subranges(self):
        self.get_dht_range().get_subranges()

    def send_subrange_data(self, node_address):
        dht_range = self.get_dht_range()
        subranges = dht_range.get_subranges()
        if not subranges:
            raise Exception('Range is not splitted!')

        ret_range, new_range = subranges
        try:
            logger.debug('Starting subrange data transfering to %s'% node_address)
            for key, data in ret_range.iter_range():
                params = {'key': key, 'carefully_save': True}
                req = FabnetPacketRequest(method='PutDataBlock', \
                                    sender=self.self_address, binary_data=data, sync=True,
                                    parameters=params)

                resp = self.call_node(node_address, req)
                if resp.ret_code:
                    raise Exception('Init PutDataBlock operation on %s error. Details: %s'%(node_address, resp.ret_message))

            new_range.save_range()
            self.update_dht_range(new_range)
        except Exception, err:
            logger.error('send_subrange_data error: %s'%err)
            dht_range.join_subranges()
            raise err

        ret_range._destroy(force=True)
        append_lst = [(ret_range.get_start(), ret_range.get_end(), node_address)]
        append_lst.append((new_range.get_start(), new_range.get_end(), self.self_address))
        rm_lst = [(dht_range.get_start(), dht_range.get_end(), self.self_address)]
        req = FabnetPacketRequest(method='UpdateHashRangeTable', \
                    sender=self.self_address,\
                    parameters={'append': append_lst, 'remove': rm_lst})
        self.call_network(req)