def _do_request_list_files(self, suppliers_list): backup_matrix.add_list_files_query_callback( customer_idurl=self.queue_owner_idurl, query_path=self.queue_alias, callback_method=self._on_list_files_response, ) self.correctable_errors = eccmap.GetCorrectableErrors( len(suppliers_list)) for supplier_pos, supplier_idurl in enumerate(suppliers_list): if not supplier_idurl: self.requested_list_files[supplier_pos] = False continue outpacket = p2p_service.SendListFiles( target_supplier=supplier_idurl, key_id=self.group_key_id, query_items=[ self.queue_alias, ], timeout=30, callbacks={ commands.Fail(): lambda resp, info: self._on_list_files_failed(supplier_pos ), None: lambda pkt_out: self._on_list_files_failed(supplier_pos), }, ) self.requested_list_files[ supplier_pos] = None if outpacket else False if _Debug: lg.args(_DebugLevel, requested=self.requested_list_files) self.request_list_files_timer = reactor.callLater( 30, self._on_request_list_files_timeout) # @UndefinedVariable
def _do_request(self, x=None): from raid import eccmap self.received_lf_counter = 0 self.requested_lf_packet_ids.clear() known_suppliers = contactsdb.suppliers(customer_idurl=self.target_customer_idurl) try: self.critical_suppliers_number = eccmap.GetCorrectableErrors(len(known_suppliers)) except: lg.warn('number of known suppliers for customer %r is not standard' % self.target_customer_idurl) self.critical_suppliers_number = int(float(len(known_suppliers)) * 0.75) for idurl in known_suppliers: if idurl: if online_status.isOnline(idurl): if _Debug: lg.out(_DebugLevel, 'list_files_orator._do_request ListFiles() from my supplier %s' % idurl) outpacket = p2p_service.SendListFiles( target_supplier=idurl, customer_idurl=self.target_customer_idurl, timeout=30, ) if outpacket: self.requested_lf_packet_ids.add(outpacket.PacketID) else: lg.err('failed sending ListFiles() to %r' % idurl) else: lg.warn('skip sending ListFiles() because %s is not online' % idurl)
def doInit(self, *args, **kwargs): """ Action method. """ self._do_block_rebuilding() self.known_suppliers = [ _f for _f in contactsdb.suppliers(customer_idurl=self.customer_idurl) if _f ] if not self.EccMap: if self.customer_idurl == my_id.getIDURL(): self.EccMap = eccmap.Current() lg.info('ECC map %r set from local for my own suppliers' % self.EccMap) if not self.EccMap: known_eccmap_dict = {} for supplier_idurl in self.known_suppliers: known_ecc_map = contactsdb.get_supplier_meta_info( supplier_idurl=supplier_idurl, customer_idurl=self.customer_idurl, ).get('ecc_map', None) if known_ecc_map: if known_ecc_map not in known_eccmap_dict: known_eccmap_dict[known_ecc_map] = 0 known_eccmap_dict[known_ecc_map] += 1 if known_eccmap_dict: all_known_eccmaps = list(known_eccmap_dict.items()) all_known_eccmaps.sort(key=lambda i: i[1], reverse=True) self.EccMap = eccmap.eccmap(all_known_eccmaps[0][0]) lg.info('ECC map %r recognized from suppliers meta info' % self.EccMap) else: known_ecc_map = None if driver.is_on('service_shared_data'): from access import shared_access_coordinator active_share = shared_access_coordinator.get_active_share( self.key_id) if active_share: known_ecc_map = active_share.known_ecc_map if known_ecc_map: self.EccMap = eccmap.eccmap(known_ecc_map) lg.info('ECC map %r recognized from active share %r' % ( self.EccMap, active_share, )) else: num_suppliers = len(self.known_suppliers) if num_suppliers not in eccmap.GetPossibleSuppliersCount(): num_suppliers = settings.DefaultDesiredSuppliers() self.EccMap = eccmap.eccmap( eccmap.GetEccMapName(num_suppliers)) lg.warn( 'no meta info found, guessed ECC map %r from %d known suppliers' % (self.EccMap, len(self.known_suppliers))) self.max_errors = eccmap.GetCorrectableErrors( self.EccMap.NumSuppliers()) if data_receiver.A(): data_receiver.A().addStateChangedCallback( self._on_data_receiver_state_changed)
def isStillCorrectable(self, arg): """ Condition method. """ max_errors = eccmap.GetCorrectableErrors(self.EccMap.NumSuppliers()) result = bool(len(self.RequestFails) <= max_errors) if _Debug: lg.out(_DebugLevel, 'restore_worker.isStillCorrectable max_errors=%d, fails=%d' % ( max_errors, len(self.RequestFails), )) return result
def isEnoughListFilesReceived(self, *args, **kwargs): """ Condition method. """ global _ReceivedListFilesCounter lg.out( 6, 'list_files_orator.isSomeListFilesReceived %d list files was received' % _ReceivedListFilesCounter) from raid import eccmap critical_suppliers_number = eccmap.GetCorrectableErrors( eccmap.Current().suppliers_number) return _ReceivedListFilesCounter >= critical_suppliers_number
def _on_read_queue_owner_suppliers_success(self, dht_value): # TODO: add more validations of dht_value if dht_value and isinstance( dht_value, dict) and len(dht_value.get('suppliers', [])) > 0: self.suppliers_list = dht_value['suppliers'] self.ecc_map = dht_value['ecc_map'] self.correctable_errors = eccmap.GetCorrectableErrors( len(self.suppliers_list)) if _Debug: lg.args(_DebugLevel, suppliers_list=self.suppliers_list, ecc_map=self.ecc_map) if not self.suppliers_list or not self.ecc_map: self.automat('dht-read-failed', None) return None self.automat('dht-read-success') return None
def doCheckAllConnected(self, *args, **kwargs): """ Action method. """ connected_count = 0 for supplier_idurl in self.known_suppliers_list: if not id_url.is_cached(supplier_idurl): continue sc = supplier_connector.by_idurl( supplier_idurl, customer_idurl=self.customer_idurl) if sc is None or sc.state != 'CONNECTED': continue connected_count += 1 critical_suppliers_number = 1 if self.known_ecc_map: from raid import eccmap critical_suppliers_number = eccmap.GetCorrectableErrors( eccmap.GetEccMapSuppliersNumber(self.known_ecc_map)) if connected_count >= critical_suppliers_number: self.automat('all-suppliers-connected')
def _do_check_supplier_connectors(self): connected_count = 0 for supplier_idurl in self.known_suppliers_list: if not id_url.is_cached(supplier_idurl): continue sc = supplier_connector.by_idurl( supplier_idurl, customer_idurl=self.customer_idurl) if sc is None or sc.state != 'CONNECTED': continue connected_count += 1 critical_suppliers_number = 1 if self.known_ecc_map: from raid import eccmap critical_suppliers_number = eccmap.GetCorrectableErrors( eccmap.GetEccMapSuppliersNumber(self.known_ecc_map)) if _Debug: lg.args(_DebugLevel, connected_count=connected_count, critical_suppliers_number=critical_suppliers_number) if connected_count >= critical_suppliers_number: self.automat('all-suppliers-connected')
def doDecideToDismiss(self, arg): """ Action method. """ global _SuppliersToFire to_be_fired = list(set(_SuppliersToFire)) _SuppliersToFire = [] if to_be_fired: lg.warn('going to fire %d suppliers from external request' % len(to_be_fired)) self.automat('made-decision', to_be_fired) return potentialy_fired = set() connected_suppliers = set() disconnected_suppliers = set() requested_suppliers = set() online_suppliers = set() offline_suppliers = set() redundant_suppliers = set() # if you have some empty suppliers need to get rid of them, # but no need to dismiss anyone at the moment. if '' in contactsdb.suppliers() or None in contactsdb.suppliers(): lg.warn('SKIP, found empty supplier') self.automat('made-decision', []) return number_desired = settings.getSuppliersNumberDesired() for supplier_idurl in contactsdb.suppliers(): sc = supplier_connector.by_idurl(supplier_idurl) if not sc: lg.warn('SKIP, supplier connector for supplier %s not exist' % supplier_idurl) continue if sc.state == 'NO_SERVICE': lg.warn('found "NO_SERVICE" supplier: %s' % supplier_idurl) disconnected_suppliers.add(supplier_idurl) potentialy_fired.add(supplier_idurl) elif sc.state == 'CONNECTED': connected_suppliers.add(supplier_idurl) elif sc.state in [ 'DISCONNECTED', 'REFUSE', ]: disconnected_suppliers.add(supplier_idurl) # elif sc.state in ['QUEUE?', 'REQUEST', ]: # requested_suppliers.add(supplier_idurl) if contact_status.isOffline(supplier_idurl): offline_suppliers.add(supplier_idurl) elif contact_status.isOnline(supplier_idurl): online_suppliers.add(supplier_idurl) elif contact_status.isCheckingNow(supplier_idurl): requested_suppliers.add(supplier_idurl) if contactsdb.num_suppliers() > number_desired: for supplier_index in range(number_desired, contactsdb.num_suppliers()): idurl = contactsdb.supplier(supplier_index) if idurl: lg.warn('found "REDUNDANT" supplier %s at position %d' % ( idurl, supplier_index, )) potentialy_fired.add(idurl) redundant_suppliers.add(idurl) else: lg.warn('supplier at position %d not exist' % supplier_index) if not connected_suppliers or not online_suppliers: lg.warn('SKIP, no ONLINE suppliers found at the moment') self.automat('made-decision', []) return if requested_suppliers: lg.warn('SKIP, still waiting response from some of suppliers') self.automat('made-decision', []) return if redundant_suppliers: result = list(redundant_suppliers) lg.info('will replace redundant suppliers: %s' % result) self.automat('made-decision', result) return if not disconnected_suppliers: lg.warn('SKIP, no OFFLINE suppliers found at the moment') # TODO: add more conditions to fire "slow" suppliers self.automat('made-decision', []) return if len(offline_suppliers) + len(online_suppliers) != number_desired: lg.warn('SKIP, offline + online != total count: %s %s %s' % (offline_suppliers, online_suppliers, number_desired)) self.automat('made-decision', []) return from raid import eccmap max_offline_suppliers_count = eccmap.GetCorrectableErrors( number_desired) if len(offline_suppliers) > max_offline_suppliers_count: lg.warn( 'SKIP, too many OFFLINE suppliers at the moment : %d > %d' % ( len(offline_suppliers), max_offline_suppliers_count, )) self.automat('made-decision', []) return critical_offline_suppliers_count = eccmap.GetFireHireErrors( number_desired) # TODO: temporary disabled because of an issue: too aggressive replacing suppliers who still have the data if False: # len(offline_suppliers) >= critical_offline_suppliers_count: one_dead_supplier = offline_suppliers.pop() lg.warn( 'found "CRITICALLY_OFFLINE" supplier %s, max offline limit is %d' % ( one_dead_supplier, critical_offline_suppliers_count, )) potentialy_fired.add(one_dead_supplier) if not potentialy_fired: lg.out( 6, 'fire_hire.doDecideToDismiss found no "bad" suppliers, all is good !!!!!' ) self.automat('made-decision', []) return # only replace suppliers one by one at the moment result = list(potentialy_fired) lg.info('will replace supplier %s' % result[0]) self.automat('made-decision', [ result[0], ])
def doDecideToDismiss(self, *args, **kwargs): """ Action method. """ global _SuppliersToFire from p2p import p2p_connector from p2p import network_connector from customer import supplier_connector from p2p import online_status # take any actions only if I am connected to the network if not p2p_connector.A() or not network_connector.A(): if _Debug: lg.out( _DebugLevel, 'fire_hire.doDecideToDismiss p2p_connector() is not ready yet, SKIP' ) self.automat('made-decision', []) return if not network_connector.A(): if _Debug: lg.out( _DebugLevel, 'fire_hire.doDecideToDismiss network_connector() is not ready yet, SKIP' ) self.automat('made-decision', []) return if p2p_connector.A().state != 'CONNECTED' or network_connector.A( ).state != 'CONNECTED': if _Debug: lg.out( _DebugLevel, 'fire_hire.doDecideToDismiss p2p/network is not connected at the moment, SKIP' ) self.automat('made-decision', []) return # if certain suppliers needs to be removed by manual/external request just do that to_be_fired = id_url.to_list(set(_SuppliersToFire)) _SuppliersToFire = [] if to_be_fired: lg.info('going to fire %d suppliers from external request' % len(to_be_fired)) self.automat('made-decision', to_be_fired) return # make sure to not go too far when i just want to decrease number of my suppliers number_desired = settings.getSuppliersNumberDesired() redundant_suppliers = set() if contactsdb.num_suppliers() > number_desired: for supplier_index in range(number_desired, contactsdb.num_suppliers()): idurl = contactsdb.supplier(supplier_index) if idurl: lg.info('found REDUNDANT supplier %s at position %d' % ( idurl, supplier_index, )) redundant_suppliers.add(idurl) if redundant_suppliers: result = list(redundant_suppliers) lg.info('will replace redundant suppliers: %s' % result) self.automat('made-decision', result) return # now I need to look more careful at my suppliers potentialy_fired = set() connected_suppliers = set() disconnected_suppliers = set() requested_suppliers = set() online_suppliers = set() offline_suppliers = set() # if you have some empty suppliers need to get rid of them, # but no need to dismiss anyone at the moment. my_suppliers = contactsdb.suppliers() if _Debug: lg.args(_DebugLevel, my_suppliers=my_suppliers) if id_url.is_some_empty(my_suppliers): lg.warn('SKIP, found empty supplier') self.automat('made-decision', []) return for supplier_idurl in my_suppliers: sc = supplier_connector.by_idurl(supplier_idurl) if not sc: lg.warn('SKIP, supplier connector for supplier %s not exist' % supplier_idurl) continue if sc.state == 'NO_SERVICE': lg.warn('found "NO_SERVICE" supplier: %s' % supplier_idurl) disconnected_suppliers.add(supplier_idurl) potentialy_fired.add(supplier_idurl) elif sc.state == 'CONNECTED': connected_suppliers.add(supplier_idurl) elif sc.state in [ 'DISCONNECTED', 'REFUSE', ]: disconnected_suppliers.add(supplier_idurl) # elif sc.state in ['QUEUE?', 'REQUEST', ]: # requested_suppliers.add(supplier_idurl) if online_status.isOffline(supplier_idurl): offline_suppliers.add(supplier_idurl) elif online_status.isOnline(supplier_idurl): online_suppliers.add(supplier_idurl) elif online_status.isCheckingNow(supplier_idurl): requested_suppliers.add(supplier_idurl) if not connected_suppliers or not online_suppliers: lg.warn('SKIP, no ONLINE suppliers found at the moment') self.automat('made-decision', []) return if requested_suppliers: lg.warn('SKIP, still waiting response from some of suppliers') self.automat('made-decision', []) return if not disconnected_suppliers: if _Debug: lg.out( _DebugLevel, 'fire_hire.doDecideToDismiss SKIP, no OFFLINE suppliers found at the moment' ) # TODO: add more conditions to fire "slow" suppliers - they are still connected but useless self.automat('made-decision', []) return if len(offline_suppliers) + len(online_suppliers) != number_desired: lg.warn('SKIP, offline + online != total count: %s %s %s' % (offline_suppliers, online_suppliers, number_desired)) self.automat('made-decision', []) return max_offline_suppliers_count = eccmap.GetCorrectableErrors( number_desired) if len(offline_suppliers) > max_offline_suppliers_count: lg.warn( 'SKIP, too many OFFLINE suppliers at the moment : %d > %d' % ( len(offline_suppliers), max_offline_suppliers_count, )) self.automat('made-decision', []) return critical_offline_suppliers_count = eccmap.GetFireHireErrors( number_desired) if len(offline_suppliers) >= critical_offline_suppliers_count and len( offline_suppliers) > 0: if config.conf().getBool( 'services/employer/replace-critically-offline-enabled'): # TODO: check that issue # too aggressive replacing suppliers who still have the data is very dangerous !!! one_dead_supplier = offline_suppliers.pop() lg.warn( 'found "CRITICALLY_OFFLINE" supplier %s, max offline limit is %d' % ( one_dead_supplier, critical_offline_suppliers_count, )) potentialy_fired.add(one_dead_supplier) if not potentialy_fired: if _Debug: lg.out( _DebugLevel, 'fire_hire.doDecideToDismiss found no "bad" suppliers, all is good !!!!!' ) self.automat('made-decision', []) return # only replace suppliers one by one at the moment result = list(potentialy_fired) lg.info('will replace supplier %s' % result[0]) self.automat('made-decision', [ result[0], ])
def isStillCorrectable(self, arg): return len(self.RequestFails) <= eccmap.GetCorrectableErrors( self.EccMap.NumSuppliers())