def run(self): logger.info('started') while True: for i in xrange(int(Config.MONITOR_DHT_RANGES_TIMEOUT)): if self.stopped.is_set(): break if self.interrupt.is_set(): self.interrupt.clear() break time.sleep(1) if self.stopped.is_set(): break if self.operator.status == DS_INITIALIZE: continue try: logger.debug('MonitorDHTRanges iteration...') self._process_foreign() if self.stopped.is_set(): break self._check_range_free_size() if self.stopped.is_set(): break except Exception, err: logger.write = logger.debug traceback.print_exc(file=logger) logger.error('[MonitorDHTRanges] %s'% err)
def callback(self, packet, sender=None): """In this method should be implemented logic of processing response packet from requested node @param packet - object of FabnetPacketResponse class @param sender - address of sender node. If sender == None then current node is operation initiator @return object of FabnetPacketResponse that should be resended to current node requestor or None for disabling packet resending """ logger.debug('CheckHashRangeTable response from %s: %s %s'%(packet.from_node, packet.ret_code, packet.ret_message)) if self.operator.get_status() == DS_DESTROYING: return if packet.ret_code == RC_DONT_STARTED: self.operator.remove_node_range(packet.from_node) time.sleep(self.operator.get_config_value('WAIT_DHT_TABLE_UPDATE')) self.operator.check_near_range() elif packet.ret_code == RC_OK: self.operator.check_near_range() elif packet.ret_code == RC_ERROR: logger.error('CheckHashRangeTable failed on %s. Details: %s %s'%(packet.from_node, \ packet.ret_code, packet.ret_message)) elif packet.ret_code == RC_NEED_UPDATE: self._get_ranges_table(packet.from_node, packet.ret_parameters['mod_index'], \ packet.ret_parameters['ranges_count'], packet.ret_parameters.get('force', False))
def run(self): logger.info('Thread started!') while True: try: for i in xrange(Config.DISCOVERY_TOPOLOGY_TIMEOUT): if self.stopped.is_set(): break time.sleep(1) if self.stopped.is_set(): break self.operator.check_database() from_addr = self.next_discovery_node() if from_addr: logger.info('Starting topology discovery from %s...'%from_addr) params = {"need_rebalance": 1} else: logger.info('Starting topology discovery from this node...') params = {} packet = FabnetPacketRequest(method='TopologyCognition', parameters=params) self.operator.call_network(packet, from_addr) except Exception, err: logger.error(str(err))
def send_subrange_data(self, node_address): dht_range = self.get_dht_range() subranges = dht_range.get_subranges() if not subranges: raise Exception('Range is not splitted!') ret_range, new_range = subranges try: logger.debug('Starting subrange data transfering to %s'% node_address) for key, data in ret_range.iter_range(): params = {'key': key, 'carefully_save': True} req = FabnetPacketRequest(method='PutDataBlock', \ sender=self.self_address, binary_data=data, sync=True, parameters=params) resp = self.call_node(node_address, req) if resp.ret_code: raise Exception('Init PutDataBlock operation on %s error. Details: %s'%(node_address, resp.ret_message)) new_range.save_range() self.update_dht_range(new_range) except Exception, err: logger.error('send_subrange_data error: %s'%err) dht_range.join_subranges() raise err
def run(self): logger.info('started') while True: for i in xrange(Config.MONITOR_DHT_RANGES_TIMEOUT): if self.stopped.is_set(): break time.sleep(1) if self.stopped.is_set(): break try: logger.debug('MonitorDHTRanges iteration...') self._check_range_free_size() if self.stopped.is_set(): break self._process_reservation_range() if self.stopped.is_set(): break self._process_replicas() if self.stopped.is_set(): break except Exception, err: import traceback logger.write = logger.debug traceback.print_exc(file=logger) logger.error('[MonitorDHTRanges] %s'% err)
def process(self, packet): """In this method should be implemented logic of processing reuqest packet from sender node @param packet - object of FabnetPacketRequest class @return object of FabnetPacketResponse or None for disabling packet response to sender """ if self.operator.get_status() == DS_DESTROYING: return _, icnt = self.operator.get_ranges_table_status() if icnt == 0: logger.debug('Received update for hash ranges table, but it is not initialized yet. Skip operation...') return append_lst = packet.parameters.get('append', []) rm_lst = packet.parameters.get('remove', []) rm_obj_list = [HashRange(r[0], r[1], r[2]) for r in rm_lst] ap_obj_list = [HashRange(a[0], a[1], a[2]) for a in append_lst] self._lock() try: self.operator.apply_ranges_table_changes(rm_obj_list, ap_obj_list) logger.debug('RM RANGE: %s'%', '.join([r.to_str() for r in rm_obj_list])) logger.debug('APP RANGE: %s'%', '.join([a.to_str() for a in ap_obj_list])) except Exception, err: logger.error('UpdateHashRangeTable error: %s'%err) if not packet.sender: self.operator.check_dht_range(False) #reinit=False
def run(self): logger.info('Thread started!') while not self.stopped.is_set(): dt = 0 try: t0 = datetime.now() logger.debug('Collecting %s nodes statistic...'%self.check_status) nodeaddrs = self.operator.get_nodes_list(self.check_status) for nodeaddr in nodeaddrs: logger.debug('Get statistic from %s'%nodeaddr) packet_obj = FabnetPacketRequest(method='NodeStatistic', sync=True) ret_packet = self.client.call_sync(nodeaddr, packet_obj) if self.check_status == UP and ret_packet.ret_code: logger.warning('Node with address %s does not response... Details: %s'%(nodeaddr, ret_packet)) self.operator.change_node_status(nodeaddr, DOWN) else: stat = json.dumps(ret_packet.ret_parameters) self.operator.update_node_stat(nodeaddr, stat) dt = total_seconds(datetime.now() - t0) logger.info('Nodes (with status=%s) stat is collected. Processed secs: %s'%(self.check_status, dt)) except Exception, err: logger.error(str(err)) finally:
def __process_data_block(self, key, path, dbct): self.__processed_local_blocks += 1 with ThreadSafeDataBlock(path) as db: try: header = db.get_header() data_keys = KeyUtils.get_all_keys(header.master_key, header.replica_count) if dbct == FSMappedDHTRange.DBCT_MASTER and key != header.master_key: raise Exception('Master key is invalid: %s != %s'%(key, header.master_key)) elif dbct == FSMappedDHTRange.DBCT_REPLICA: if key not in data_keys: raise Exception('Replica key is invalid: %s'%key) except Exception, err: self.__invalid_local_blocks += 1 logger.error('[RepairDataBlocks] %s'%err) return if dbct == FSMappedDHTRange.DBCT_REPLICA and self._in_check_range(data_keys[0]): self.__check_data_block(key, db, dbct, data_keys[0], \ header, FSMappedDHTRange.DBCT_MASTER) for repl_key in data_keys[1:]: if repl_key == key: continue if self._in_check_range(repl_key): self.__check_data_block(key, db, dbct, repl_key, \ header, FSMappedDHTRange.DBCT_REPLICA)
def process(self, packet): """In this method should be implemented logic of processing reuqest packet from sender node @param packet - object of FabnetPacketRequest class @return object of FabnetPacketResponse or None for disabling packet response to sender """ releases = packet.parameters.get('releases', {}) optype = self.operator.get_type().lower() for n_type, urls in releases.items(): if n_type.lower() != optype: continue if type(urls) not in (list, tuple): urls = [urls] try: self.__upgrade_node(urls, packet.parameters.get('force', False)) except Exception, err: self._throw_event(ET_ALERT, 'UpgradeNodeOperation failed', err) logger.error('[UpgradeNodeOperation] %s'%err) return FabnetPacketResponse(ret_code=RC_UPGRADE_ERROR, ret_message=err) return FabnetPacketResponse()
def process(self, packet): """In this method should be implemented logic of processing reuqest packet from sender node @param packet - object of FabnetPacketRequest class @return object of FabnetPacketResponse or None for disabling packet response to sender """ try: event_type = packet.parameters.get('event_type', None) event_provider = packet.parameters.get('event_provider', None) event_topic = packet.parameters.get('event_topic', None) if event_provider is None: raise Exception('event_provider does not found!') event_message = packet.parameters.get('event_message', None) if packet.sender is None: #this is sender if event_type == ET_ALERT: logger.warning('[ALERT][%s] *%s* %s'%(event_provider, event_topic, event_message)) elif event_type == ET_INFO: logger.info('[INFORMATION][%s] *%s* %s'%(event_provider, event_topic, event_message)) else: logger.info('[NOTIFICATION.%s][%s] *%s* %s'%(event_type, event_provider, event_topic, event_message)) self.on_network_notify(event_type, event_provider, event_topic, event_message) except Exception, err: logger.error('[NotifyOperation] %s'%err)
def notify(self, nodeaddr, event_type, event_topic, event_message): packet_obj = FabnetPacketRequest(method='NotifyOperation', \ parameters={'event_type':str(event_type), \ 'event_message':str(event_message), 'event_topic': str(event_topic), \ 'event_provider': self.operator.get_self_address()}) rcode, rmsg = self.client.call(nodeaddr, packet_obj) if rcode: logger.error('Can not call NotifyOperation: %s'%rmsg)
def get_node_version(self): if not os.path.exists(VERSION_FILE): UpgradeNodeOperation.update_node_info() try: return open(VERSION_FILE).read().strip() except Exception, err: logger.error('Cant read version from file %s. Detials: %s'%(VERSION_FILE, err)) return 'unknown'
def check_database(self): db_conn_str = Config.get('db_conn_str', self.OPTYPE) if self.__db_api and db_conn_str == self.__db_conn_str: return if self.__db_api: try: self.__db_api.close() except Exception, err: logger.error('DBAPI closing failed with error "%s"'%err)
def run(self): logger.info('Thread started!') try: self.__server.serve_forever() except Exception, err: self.is_error.set() import traceback logger.write = logger.info traceback.print_exc(file=logger) logger.error('Unexpected error: %s'%err)
def update_node_info(cls): ver = 'unknown' old_curdir = os.path.abspath(os.curdir) try: os.chdir(GIT_HOME) ret, cout, cerr = run_command_ex(['git', 'describe', '--always', '--tag']) if ret != 0: raise Exception(cerr) ver = cout.strip() except Exception, err: logger.error('"git describe --always --tag" failed: %s'%err)
def stop(self): try: logger.info('stopping operator...') self.stopped.set() self.__check_neighbours_thread.stop() uppers = self.get_neighbours(NT_UPPER) superiors = self.get_neighbours(NT_SUPERIOR) self.__unbind_neighbours(uppers, NT_SUPERIOR) self.__unbind_neighbours(superiors, NT_UPPER) except Exception, err: logger.error('Operator stopping failed. Details: %s'%err)
def start_as_dht_member(self): if self.status == DS_DESTROYING: return self.status = DS_INITIALIZE dht_range = self.get_dht_range() curr_start = dht_range.get_start() curr_end = dht_range.get_end() last_range = dht_range.get_last_range() if last_range and not self.__split_requests_cache: new_range = self.__get_next_range_near(last_range[0], last_range[1]) elif dht_range.is_max_range() or self.__split_requests_cache: new_range = self.__get_next_max_range() else: new_range = self.__get_next_range_near(curr_start, curr_end) if new_range is None: #wait and try again if self.__start_dht_try_count == Config.DHT_CYCLE_TRY_COUNT: logger.error('Cant initialize node as a part of DHT') self.__start_dht_try_count = 0 return logger.info('No ready range for me on network... So, sleep and try again') self.__start_dht_try_count += 1 self.__split_requests_cache = [] time.sleep(Config.WAIT_RANGE_TIMEOUT) return self.start_as_dht_member() if (new_range.start == curr_start and new_range.end == curr_end): new_dht_range = dht_range else: new_dht_range = FSHashRanges(long(new_range.start), long(new_range.end), self.save_path) self.update_dht_range(new_dht_range) new_dht_range.restore_from_reservation() #try getting new range data from reservation if new_range.node_address == self.self_address: self._take_range(new_range) self.set_status_to_normalwork() return self.__split_requests_cache.append(new_range.node_address) logger.info('Call SplitRangeRequest [%040x-%040x] to %s'% \ (new_dht_range.get_start(), new_dht_range.get_end(), new_range.node_address,)) parameters = { 'start_key': new_dht_range.get_start(), 'end_key': new_dht_range.get_end() } req = FabnetPacketRequest(method='SplitRangeRequest', sender=self.self_address, parameters=parameters) self.call_node(new_range.node_address, req)
def start_as_dht_member(self): if self.status == DS_DESTROYING: return logger.info('Starting as DHT member') self.status = DS_INITIALIZE dht_range = self.get_dht_range() curr_start = dht_range.get_start() curr_end = dht_range.get_end() if len(self.__split_requests_cache) == 1: #after first fail try init with last range dht_range = dht_range.get_last_range() if dht_range.is_max_range() or self.__split_requests_cache: new_range = self.__get_next_max_range() else: new_range = self.__get_next_range_near(curr_start, curr_end) if new_range is None: #wait and try again if self.__start_dht_try_count == int(Config.DHT_CYCLE_TRY_COUNT): logger.error('Cant initialize node as a part of DHT') self.__start_dht_try_count = 0 return logger.info('No ready range for me on network... So try sync ranges tables') self.__start_dht_try_count += 1 self.__split_requests_cache = [] self.check_range_table() return if (new_range.start == curr_start and new_range.end == curr_end): new_dht_range = dht_range else: new_dht_range = FSMappedDHTRange(long(new_range.start), long(new_range.end), self.save_path) self.update_dht_range(new_dht_range) if new_range.node_address == self.self_address: self._take_range(new_range) self.set_status_to_normalwork() return self.__split_requests_cache.append(new_range.node_address) logger.info('Call SplitRangeRequest [%040x-%040x] to %s'% \ (new_dht_range.get_start(), new_dht_range.get_end(), new_range.node_address,)) parameters = { 'start_key': new_dht_range.get_start(), 'end_key': new_dht_range.get_end() } req = FabnetPacketRequest(method='SplitRangeRequest', sender=self.self_address, parameters=parameters) self.call_node(new_range.node_address, req)
def send_subrange_data(self, node_address): dht_range = self.get_dht_range() subranges = dht_range.get_subranges() if not subranges: raise Exception('Range is not splitted!') ret_range, new_range = subranges try: self.__monitor_dht_ranges.force() self.update_dht_range(new_range) self.set_status_to_normalwork(save_range=True) except Exception, err: logger.error('send_subrange_data error: %s'%err) dht_range.join_subranges() raise err
def __process_data_block(self, key, raw_data, is_replica=False): self.__processed_local_blocks += 1 try: raw_header = raw_data.read(DataBlockHeader.HEADER_LEN) primary_key, replica_count, checksum, user_id, stored_dt = DataBlockHeader.unpack(raw_header) if not is_replica: if key != primary_key: raise Exception("Primary key is invalid: %s != %s" % (key, primary_key)) data_keys = KeyUtils.get_all_keys(primary_key, replica_count) if is_replica: if key not in data_keys: raise Exception("Replica key is invalid: %s" % key) except Exception, err: self.__invalid_local_blocks += 1 logger.error("[RepairDataBlocks] %s" % err) return
def run(self): logger.info('Check neighbours thread is started!') proc_dt = timedelta(0) while not self.stopped.is_set(): try: t0 = datetime.now() self.operator.check_neighbours() proc_dt = datetime.now() - t0 logger.debug('CheckNeighbours process time: %s'%proc_dt) except Exception, err: logger.write = logger.debug traceback.print_exc(file=logger) logger.error('[CheckNeighboursThread] %s'%err) finally:
def callback(self, packet, sender=None): """In this method should be implemented logic of processing response packet from requested node @param packet - object of FabnetPacketResponse class @param sender - address of sender node. If sender == None then current node is operation initiator @return object of FabnetPacketResponse that should be resended to current node requestor or None for disabling packet resending """ if packet.ret_code != RC_OK: logger.error('Cant split range from %s. Details: %s'%(sender, packet.ret_message)) logger.info('SplitRangeRequest failed! Trying select other hash range...') self.operator.start_as_dht_member() else: subrange_size = int(packet.ret_parameters['range_size']) self.operator.accept_foreign_subrange(packet.from_node, subrange_size)
def _pull_subrange(self, dht_range): split_part = int((dht_range.length() * float(Config.PULL_SUBRANGE_SIZE_PERC)) / 100) if self.__last_is_start_part: dest_key = dht_range.get_start() - 1 start_subrange = dht_range.get_start() end_subrange = split_part + dht_range.get_start() else: dest_key = dht_range.get_end() + 1 start_subrange = dht_range.get_end() - split_part end_subrange = dht_range.get_end() self.__last_is_start_part = not self.__last_is_start_part if dest_key < MIN_KEY: logger.info('[_pull_subrange] no range at left...') return False if dest_key > MAX_KEY: logger.info('[_pull_subrange] no range at right...') return False k_range = self.operator.ranges_table.find(dest_key) if not k_range: logger.error('[_pull_subrange] No range found for key=%s in ranges table'%dest_key) return False pull_subrange, new_dht_range = dht_range.split_range(start_subrange, end_subrange) subrange_size = pull_subrange.get_data_size() try: logger.info('Call PullSubrangeRequest [%040x-%040x] to %s'%(pull_subrange.get_start(), pull_subrange.get_end(), k_range.node_address)) parameters = { 'start_key': pull_subrange.get_start(), 'end_key': pull_subrange.get_end(), 'subrange_size': subrange_size } req = FabnetPacketRequest(method='PullSubrangeRequest', sender=self.operator.self_address, parameters=parameters, sync=True) resp = self.operator.call_node(k_range.node_address, req) if resp.ret_code != RC_OK: raise Exception(resp.ret_message) new_dht_range.save_range() self.operator.update_dht_range(new_dht_range) self.__changed_range = True except Exception, err: logger.error('PullSubrangeRequest operation failed on node %s. Details: %s'%(k_range.node_address, err)) dht_range.join_subranges() return False
def before_resend(self, packet): """In this method should be implemented packet transformation for resend it to neighbours @params packet - object of FabnetPacketRequest class @return object of FabnetPacketRequest class or None for disabling packet resend to neigbours """ auth_key = packet.parameters.get('auth_key', None) if auth_key: auth_key = str(auth_key) if len(auth_key) < 16: logger.error('SetAuthKey: too short auth_key!') return else: auth_key = self.operator.generate_auth_key() packet.parameters['auth_key'] = auth_key return packet
def _put_data(self, key, path, dbct): k_range = self.operator.ranges_table.find(long(key, 16)) if not k_range: logger.debug('No range found for reservation key %s'%key) return False tmp = None if os.path.isdir(path): tmp = tempfile.NamedTemporaryFile(suffix='.zip') os.system('rm -f %s && cd %s && zip -r %s *'%(tmp.name, path, tmp.name)) path = tmp.name try: db = ThreadSafeDataBlock(path) if not db.try_block_for_read(): logger.info('DB %s is locked. skip it...'%path) return False if k_range.node_address in self.__full_nodes: logger.info('Node %s does not have free space. Skipping put data block...'%k_range.node_address) return False if k_range.node_address == self.operator.self_address: logger.info('Skip moving to local node') return False params = {'key': key, 'dbct': dbct, 'init_block': False, 'carefully_save': True} req = FabnetPacketRequest(method='PutDataBlock', sender=self.operator.self_address, \ parameters=params, binary_data=ThreadSafeDataBlock(path), sync=True) resp = self.operator.call_node(k_range.node_address, req) finally: if tmp: tmp.close() if resp.ret_code == RC_NO_FREE_SPACE: self.__full_nodes.append(k_range.node_address) return False if resp.ret_code not in (RC_OK, RC_OLD_DATA): logger.error('PutDataBlock error on %s: %s'%(k_range.node_address, resp.ret_message)) return False return True
def callback(self, packet, sender=None): """In this method should be implemented logic of processing response packet from requested node @param packet - object of FabnetPacketResponse class @param sender - address of sender node. If sender == None then current node is operation initiator @return object of FabnetPacketResponse that should be resended to current node requestor or None for disabling packet resending """ if packet.ret_code != RC_OK: logger.error('No discovery response from neighbour.. It makes me sad panda :(') return node = packet.ret_parameters['node'] uppers = packet.ret_parameters.get('uppers', []) superiors = packet.ret_parameters.get('superiors', []) self.operator.start_discovery_process(node, uppers, superiors)
def process(self, socket_processor): try: packet = socket_processor.recv_packet() if not (packet.is_request and packet.sync): raise Exception('Async operations for management agent does not supported!') ret_code, ret_msg = self.process_operation(packet) ret_packet = FabnetPacketResponse(ret_code=ret_code, ret_message=ret_msg) socket_processor.send_packet(ret_packet) socket_processor.close_socket(force=True) except Exception, err: ret_message = 'MgmtCommandsProcessor.process() error: %s' % err logger.write = logger.info traceback.print_exc(file=logger) try: if not socket_processor.is_closed(): err_packet = FabnetPacketResponse(ret_code=RC_ERROR, ret_message=str(err)) socket_processor.send_packet(err_packet) except Exception, err: logger.error("Can't send error message to socket: %s"%err)
def run(self): logger.info('Thread started!') t0 = datetime.now() while not self.stopped.is_set(): dt = datetime.now() - t0 if dt.total_seconds() > float(Config.FLUSH_MD_CACHE_TIMEOUT): self.operator.flush_md_cache() t0 = datetime.now() try: if not self.operator.check_range_table(): logger.info('Waiting neighbours...') time.sleep(float(Config.INIT_DHT_WAIT_NEIGHBOUR_TIMEOUT)) continue except Exception, err: logger.error(str(err)) for i in xrange(int(Config.CHECK_HASH_TABLE_TIMEOUT)): if self.stopped.is_set(): break time.sleep(1)
def worker_routine(self, item): if len(item) != 2: raise Exception('Expected (<address>,<packet>), but "%s" occured'%item) address, packet = item rcode, rmsg = self.fri_client.call(address, packet) if rcode == RC_OK: return logger.error("Can't call async operation %s on %s. Details: %s"%\ (getattr(packet, 'method', 'callback'), address, rmsg)) logger.debug('Failed packet: %s'%packet) if packet.is_response: return ret_packet = FabnetPacketResponse(message_id=packet.message_id, \ from_node=address, ret_code=RC_DONT_STARTED, ret_message=rmsg) if not self.operator.is_stopped(): rcode, rmsg = self.fri_client.call(self.self_address, ret_packet) if rcode == RC_OK: return logger.error("Can't send error response to self node")
def __upgrade_node(self, origin_url): f_upgrage_log = None old_curdir = os.path.abspath(os.curdir) try: if not origin_url: raise Exception('origin_url does not found') f_upgrage_log = open(os.path.join(self.home_dir, 'upgrade_node.log'), 'a') f_upgrage_log.write('='*80+'\n') f_upgrage_log.write('UPGRADE FROM %s ... NOW = %s\n'%(origin_url, datetime.now())) f_upgrage_log.write('='*80+'\n') os.chdir(GIT_HOME) os.system('git checkout -- .') #clear local changes... os.system('git config --local --replace-all remote.origin.url %s'%origin_url) ret, cout, cerr = run_command_ex(['git', 'pull']) f_upgrage_log.write('===> git pull finished with code %s\n'%ret) f_upgrage_log.write('===> stdout: \n%s'%cout) f_upgrage_log.write('===> stderr: \n%s'%cerr) if ret != 0: raise Exception('git pull failed: %s'%cerr) optype = self.operator.get_type() ret, cout, cerr = run_command_ex(['./fabnet/bin/upgrade-node', optype]) f_upgrage_log.write('===> ./fabnet/bin/upgrade-node %s finished with code %s\n'%(optype, ret)) f_upgrage_log.write('===> stdout: \n%s'%cout) f_upgrage_log.write('===> stderr: \n%s'%cerr) if ret != 0: raise Exception('upgrade-node script failed!') self.update_node_info() f_upgrage_log.write('Node is upgraded successfully!\n\n') except Exception, err: self._throw_event(ET_ALERT, 'UpgradeNodeOperation failed', err) logger.error('[UpgradeNodeOperation] %s'%err)