def learning(input): worker = multiprocessing.cpu_count() if configserver.get('number_worker') > 0: worker = configserver.get('number_worker') queue_classifier = multiprocessing.Queue() for c in classifier.get_all_classifiers(): queue_classifier.put_nowait(c) processes = [] failed = [] for i in range(worker): processes += [multiprocessing.Process(target=_learning_worker, args=(input, queue_classifier))] for process in processes: process.start() for process in processes: process.join() if not process.exitcode == 0: failed += [process] if len(failed) > 0: logging.error('{} processes have failed - result might not be complete'.format(len(failed)))
def load_classifyhub_settings(): settings = QSettings() old_config = configserver.get_config() configserver.parse_args() for key in old_config.keys(): if configserver.get(key) == old_config[key]: # This key wasn't changed by cmd arguments - so try to load it from config data = configserver.get(key) if isinstance(data, bool): configserver.set( key, bool( int( settings.value('classifyhub/{}'.format(key), old_config[key])))) elif isinstance(data, int): configserver.set( key, int( settings.value('classifyhub/{}'.format(key), old_config[key]))) elif isinstance(data, str): configserver.set( key, str( settings.value('classifyhub/{}'.format(key), old_config[key]))) else: # Save cmd options so no confusion will occur for user settings.setValue('classifyhub/{}'.format(key), configserver.get(key))
def batch(input): if len(input) == 0: return [] worker = multiprocessing.cpu_count() if configserver.get('number_worker') > 0: worker = configserver.get('number_worker') queue_input = multiprocessing.Queue() queue_output = multiprocessing.Queue() for data in input: queue_input.put_nowait(data) processes = [] failed = [] for i in range(worker): processes += [multiprocessing.Process(target=_batch_worker, args=(queue_input, queue_output))] for process in processes: process.start() result = [] alive = True # We have to pull all elements out of the queue or else the process might not terminate # See https://docs.python.org/3/library/multiprocessing.html#programming-guidelines while alive: try: while True: result += [queue_output.get(True, 1)] except queue.Empty: pass alive = False for process in processes: if process.is_alive(): alive = True break for process in processes: process.join() if not process.exitcode == 0: failed += [process] if len(failed) > 0: logging.error('{} processes have failed - result might not be complete'.format(len(failed))) # Try again just in case we missed some elements try: while True: result += [queue_output.get(True, 1)] except queue.Empty: pass if len(result) != len(input): logging.error('Expected {} results, got {} - some results are missing'.format(len(input), len(result))) return result
def check_stale_lock(lock_path, time=20): worker = multiprocessing.cpu_count() if configserver.get('number_worker') > 0: worker = configserver.get('number_worker') lock = lockfile.LockFile(lock_path) test_lock = lockfile.LockFile(lock_path + '_STALE') while not test_lock.i_am_locking(): try: try: # Because this is longer then the lock timeout, we are sure that this is stale test_lock.acquire(timeout=time + time / 5 * worker + (time / 2) * random.random()) except lockfile.LockTimeout: test_lock.break_lock() except Exception as e: logging.debug('Error at locking stale test log: {}'.format(e)) pass try: lock.acquire(timeout=time) lock.release() except lockfile.LockTimeout: logging.debug('Breaking log {}'.format(lock_path)) try: lock.break_lock() except lockfile.NotLocked: pass try: test_lock.release() except lockfile.NotLocked: # Someone broke it - not bad pass
def main(): data = processor.file_to_input(configserver.get('input')) if len(data) == 0: logging.error('No learning data - aborting') return logging.log(configserver.output_log_level(), 'Batch processing started') result = processor.batch(data) processor.result_to_file(result, configserver.get('output')) logging.log(configserver.output_log_level(), 'Batch processing finished')
def check_user_and_secret(): github_secret = github.GithubSecret() if not github_secret.secret_available: QMessageBox.warning( None, 'No authentification possible', 'Authentificated requests are not possible - this will limit you to only a few GitHub requests per hour. ' 'Please ensure that the user file ({}) and the secret file ({}) exist in your setup. ' 'The file names must match exactly, including the file extension, which is not visible on all systems.' .format(configserver.get('user_file'), configserver.get('secret_file')))
def _learning_handle(self): learning_data = processor.dir_to_learning( configserver.get('learning_input')) if len(learning_data) == 0: QMessageBox.critical( None, 'No learning data', 'Learning data not found ({}), the application will not work. Please rerun the learning once the configuration was fixed.' .format(configserver.get('learning_input'))) processor.learning(learning_data) self._running = False self.runningChanged.emit(False) self._learning_running = False self.learningRunningChanged.emit(False)
def __init__(self, name): ## # \var config # \brief Holds the configuration. # # As default the conf variable holds an empty dict which can be used, however it is possible to replace # it with anything that can be serialised as an JSON (e.g. list). self.config = dict() if name is '': logging.warning('Empty name') name = 'UNKNOWN' self._dir = configserver.get('model_path') self._path = self._dir + '/' + name + '.model' if os.path.isdir(self._dir) and os.path.exists(self._path): utility.check_stale_lock(self._path + '_LOCK') lock = lockfile.LockFile(self._path + '_LOCK') with lock: with open(self._path, 'r') as file: try: self.config = json.load(file) except: logging.error('Can not load model {}'.format(self._path))
def main(): data = processor.dir_to_learning(configserver.get('learning_input')) if len(data) == 0: logging.error('No learning data - aborting') return logging.log(configserver.output_log_level(), 'Learning started') logging.log( configserver.output_log_level(), 'Depending on your system, the size of learning data and the amount that needs to be downloaded this might take a while. Please wait.' ) processor.learning(data) logging.log(configserver.output_log_level(), 'Learning finished')
def getBoolConfig(self, key): return bool( int( self.settings.value('classifyhub/{}'.format(key), configserver.get(key))))
def getStringConfig(self, key): return str( self.settings.value('classifyhub/{}'.format(key), configserver.get(key)))
QApplication.setOrganizationName('Top-Ranger') load_classifyhub_settings() qmlRegisterType(UIProxy, 'UIProxy', 1, 0, 'UIProxy') qmlRegisterType(SettingsProxy, 'SettingsProxy', 1, 0, 'SettingsProxy') # Catch any python error to give an error message try: # test for secret check_user_and_secret() # Test if we need to learn need_to_learn = True if os.path.exists(configserver.get('model_path')): for file in os.listdir(configserver.get('model_path')): if os.path.isfile(configserver.get('model_path') + '/' + file) and file.endswith('.model'): need_to_learn = False break if need_to_learn: QMessageBox.information( None, 'No models', 'It seems that no models are present on your system ({}), this means that the learning process has not been run yet.\n' 'The learning process will be started now. This might take some time.' .format(configserver.get('model_path'))) _learning_needed = True view = QQmlApplicationEngine()
def main(): # Open Output file file = None try: file = open(configserver.get('output'), 'w') except OSError: logging.error('Can not save results to {}'.format( configserver.get('output'))) # Prepare data data = processor.dir_to_learning(configserver.get('learning_input')) if len(data) == 0: logging.error('No learning data - aborting') return k_fold = configserver.get('k-fold') if k_fold < 2: logging.error('k-cross must be at least 2 (is: {})'.format(k_fold)) return logging.log(configserver.output_log_level(), 'Starting validation ({}-cross validation)'.format(k_fold)) logging.log( configserver.output_log_level(), 'Depending on your system, the size of learning/validation data and the amount that needs to be downloaded this might take a while. Please wait.' ) if file is not None: file.write( 'Starting validation ({}-cross validation)\n'.format(k_fold)) file.flush() datasets = [[] for i in range(k_fold)] for d in data: datasets[random.randint(0, k_fold - 1)] += [d] # Run k-fold cross-validation precision = utility.get_zero_class_dict() recall = utility.get_zero_class_dict() for run in range(k_fold): logging.log(configserver.output_log_level(), 'Starting validation run {}'.format(run + 1)) if file is not None: file.write('Starting validation run {}\n'.format(run + 1)) file.flush() learn = [] truth = [] # Create datasets for run for i in range(k_fold): if i == run: truth = datasets[i] else: learn += datasets[i] # Remove labels validate = [x[0] for x in truth] # Learn processor.learning(learn) # Calculate validation data set result = processor.batch(validate) # Cache results of this run for c in utility.get_classes(): precision_result = calculate_precision(truth, result, c) recall_result = calculate_recall(truth, result, c) if file is not None: file.write( '{:6} - precision: {:6.4f}, recall: {:6.4f}\n'.format( c, precision_result, recall_result)) precision[c] += precision_result recall[c] += recall_result if file is not None: file.write('\n') file.flush() # Calculate average for c in utility.get_classes(): precision[c] /= k_fold recall[c] /= k_fold # Print results logging.log( configserver.output_log_level(), 'Average results from {}-fold cross-validation:'.format(k_fold)) precision_avg = 0.0 recall_avg = 0.0 if file is not None: file.write( 'Average results from {}-fold cross-validation:\n'.format(k_fold)) for c in utility.get_classes(): precision_avg += precision[c] recall_avg += recall[c] logging.log( configserver.output_log_level(), '{:6} - precision: {:6.4f}, recall: {:6.4f}'.format( c, precision[c], recall[c])) if file is not None: file.write('{:6} - precision: {:6.4f}, recall: {:6.4f}\n'.format( c, precision[c], recall[c])) precision_avg /= len(utility.get_classes()) recall_avg /= len(utility.get_classes()) logging.log( configserver.output_log_level(), '{:6} - precision: {:6.4f}, recall: {:6.4f}'.format( 'ALL', precision_avg, recall_avg)) # Close file if open if file is not None: file.write('{:6} - precision: {:6.4f}, recall: {:6.4f}\n'.format( 'ALL', precision_avg, recall_avg)) file.write('\n') file.close()