Exemplo n.º 1
0
def learning(input):
    worker = multiprocessing.cpu_count()
    if configserver.get('number_worker') > 0:
        worker = configserver.get('number_worker')

    queue_classifier = multiprocessing.Queue()

    for c in classifier.get_all_classifiers():
        queue_classifier.put_nowait(c)

    processes = []
    failed = []

    for i in range(worker):
        processes += [multiprocessing.Process(target=_learning_worker, args=(input, queue_classifier))]

    for process in processes:
        process.start()

    for process in processes:
        process.join()
        if not process.exitcode == 0:
            failed += [process]

    if len(failed) > 0:
        logging.error('{} processes have failed - result might not be complete'.format(len(failed)))
Exemplo n.º 2
0
def load_classifyhub_settings():
    settings = QSettings()
    old_config = configserver.get_config()
    configserver.parse_args()

    for key in old_config.keys():
        if configserver.get(key) == old_config[key]:
            # This key wasn't changed by cmd arguments - so try to load it from config
            data = configserver.get(key)
            if isinstance(data, bool):
                configserver.set(
                    key,
                    bool(
                        int(
                            settings.value('classifyhub/{}'.format(key),
                                           old_config[key]))))
            elif isinstance(data, int):
                configserver.set(
                    key,
                    int(
                        settings.value('classifyhub/{}'.format(key),
                                       old_config[key])))
            elif isinstance(data, str):
                configserver.set(
                    key,
                    str(
                        settings.value('classifyhub/{}'.format(key),
                                       old_config[key])))
        else:
            # Save cmd options so no confusion will occur for user
            settings.setValue('classifyhub/{}'.format(key),
                              configserver.get(key))
Exemplo n.º 3
0
def batch(input):
    if len(input) == 0:
        return []

    worker = multiprocessing.cpu_count()
    if configserver.get('number_worker') > 0:
        worker = configserver.get('number_worker')

    queue_input = multiprocessing.Queue()
    queue_output = multiprocessing.Queue()

    for data in input:
        queue_input.put_nowait(data)

    processes = []
    failed = []

    for i in range(worker):
        processes += [multiprocessing.Process(target=_batch_worker, args=(queue_input, queue_output))]

    for process in processes:
        process.start()

    result = []
    alive = True

    # We have to pull all elements out of the queue or else the process might not terminate
    # See https://docs.python.org/3/library/multiprocessing.html#programming-guidelines
    while alive:
        try:
            while True:
                result += [queue_output.get(True, 1)]
        except queue.Empty:
            pass

        alive = False
        for process in processes:
            if process.is_alive():
                alive = True
                break

    for process in processes:
        process.join()
        if not process.exitcode == 0:
            failed += [process]

    if len(failed) > 0:
        logging.error('{} processes have failed - result might not be complete'.format(len(failed)))

    # Try again just in case we missed some elements
    try:
        while True:
            result += [queue_output.get(True, 1)]
    except queue.Empty:
        pass

    if len(result) != len(input):
        logging.error('Expected {} results, got {} - some results are missing'.format(len(input), len(result)))

    return result
Exemplo n.º 4
0
def check_stale_lock(lock_path, time=20):
    worker = multiprocessing.cpu_count()
    if configserver.get('number_worker') > 0:
        worker = configserver.get('number_worker')

    lock = lockfile.LockFile(lock_path)
    test_lock = lockfile.LockFile(lock_path + '_STALE')
    while not test_lock.i_am_locking():
        try:
            try:
                # Because this is longer then the lock timeout, we are sure that this is stale
                test_lock.acquire(timeout=time + time / 5 * worker +
                                  (time / 2) * random.random())
            except lockfile.LockTimeout:
                test_lock.break_lock()
        except Exception as e:
            logging.debug('Error at locking stale test log: {}'.format(e))
            pass

    try:
        lock.acquire(timeout=time)
        lock.release()
    except lockfile.LockTimeout:
        logging.debug('Breaking log {}'.format(lock_path))
        try:
            lock.break_lock()
        except lockfile.NotLocked:
            pass

    try:
        test_lock.release()
    except lockfile.NotLocked:
        # Someone broke it - not bad
        pass
Exemplo n.º 5
0
def main():
    data = processor.file_to_input(configserver.get('input'))
    if len(data) == 0:
        logging.error('No learning data - aborting')
        return
    logging.log(configserver.output_log_level(), 'Batch processing started')
    result = processor.batch(data)
    processor.result_to_file(result, configserver.get('output'))
    logging.log(configserver.output_log_level(), 'Batch processing finished')
Exemplo n.º 6
0
def check_user_and_secret():
    github_secret = github.GithubSecret()

    if not github_secret.secret_available:
        QMessageBox.warning(
            None, 'No authentification possible',
            'Authentificated requests are not possible - this will limit you to only a few GitHub requests per hour. '
            'Please ensure that the user file ({}) and the secret file ({}) exist in your setup. '
            'The file names must match exactly, including the file extension, which is not visible on all systems.'
            .format(configserver.get('user_file'),
                    configserver.get('secret_file')))
Exemplo n.º 7
0
    def _learning_handle(self):
        learning_data = processor.dir_to_learning(
            configserver.get('learning_input'))
        if len(learning_data) == 0:
            QMessageBox.critical(
                None, 'No learning data',
                'Learning data not found ({}), the application will not work. Please rerun the learning once the configuration was fixed.'
                .format(configserver.get('learning_input')))
        processor.learning(learning_data)

        self._running = False
        self.runningChanged.emit(False)
        self._learning_running = False
        self.learningRunningChanged.emit(False)
Exemplo n.º 8
0
    def __init__(self, name):
        ##
        # \var config
        # \brief Holds the configuration.
        #
        # As default the conf variable holds an empty dict which can be used, however it is possible to replace
        # it with anything that can be serialised as an JSON (e.g. list).

        self.config = dict()

        if name is '':
            logging.warning('Empty name')
            name = 'UNKNOWN'

        self._dir = configserver.get('model_path')
        self._path = self._dir + '/' + name + '.model'

        if os.path.isdir(self._dir) and os.path.exists(self._path):
            utility.check_stale_lock(self._path + '_LOCK')
            lock = lockfile.LockFile(self._path + '_LOCK')
            with lock:
                with open(self._path, 'r') as file:
                    try:
                        self.config = json.load(file)
                    except:
                        logging.error('Can not load model {}'.format(self._path))
Exemplo n.º 9
0
def main():
    data = processor.dir_to_learning(configserver.get('learning_input'))
    if len(data) == 0:
        logging.error('No learning data - aborting')
        return
    logging.log(configserver.output_log_level(), 'Learning started')
    logging.log(
        configserver.output_log_level(),
        'Depending on your system, the size of learning data and the amount that needs to be downloaded this might take a while. Please wait.'
    )
    processor.learning(data)
    logging.log(configserver.output_log_level(), 'Learning finished')
Exemplo n.º 10
0
 def getBoolConfig(self, key):
     return bool(
         int(
             self.settings.value('classifyhub/{}'.format(key),
                                 configserver.get(key))))
Exemplo n.º 11
0
 def getStringConfig(self, key):
     return str(
         self.settings.value('classifyhub/{}'.format(key),
                             configserver.get(key)))
Exemplo n.º 12
0
    QApplication.setOrganizationName('Top-Ranger')

    load_classifyhub_settings()

    qmlRegisterType(UIProxy, 'UIProxy', 1, 0, 'UIProxy')
    qmlRegisterType(SettingsProxy, 'SettingsProxy', 1, 0, 'SettingsProxy')

    # Catch any python error to give an error message
    try:

        # test for secret
        check_user_and_secret()

        # Test if we need to learn
        need_to_learn = True
        if os.path.exists(configserver.get('model_path')):
            for file in os.listdir(configserver.get('model_path')):
                if os.path.isfile(configserver.get('model_path') + '/' +
                                  file) and file.endswith('.model'):
                    need_to_learn = False
                    break

        if need_to_learn:
            QMessageBox.information(
                None, 'No models',
                'It seems that no models are present on your system ({}), this means that the learning process has not been run yet.\n'
                'The learning process will be started now. This might take some time.'
                .format(configserver.get('model_path')))
            _learning_needed = True

        view = QQmlApplicationEngine()
Exemplo n.º 13
0
def main():
    # Open Output file
    file = None
    try:
        file = open(configserver.get('output'), 'w')
    except OSError:
        logging.error('Can not save results to {}'.format(
            configserver.get('output')))

    # Prepare data
    data = processor.dir_to_learning(configserver.get('learning_input'))
    if len(data) == 0:
        logging.error('No learning data - aborting')
        return

    k_fold = configserver.get('k-fold')

    if k_fold < 2:
        logging.error('k-cross must be at least 2 (is: {})'.format(k_fold))
        return

    logging.log(configserver.output_log_level(),
                'Starting validation ({}-cross validation)'.format(k_fold))
    logging.log(
        configserver.output_log_level(),
        'Depending on your system, the size of learning/validation data and the amount that needs to be downloaded this might take a while. Please wait.'
    )
    if file is not None:
        file.write(
            'Starting validation ({}-cross validation)\n'.format(k_fold))
        file.flush()

    datasets = [[] for i in range(k_fold)]

    for d in data:
        datasets[random.randint(0, k_fold - 1)] += [d]

    # Run k-fold cross-validation
    precision = utility.get_zero_class_dict()
    recall = utility.get_zero_class_dict()

    for run in range(k_fold):
        logging.log(configserver.output_log_level(),
                    'Starting validation run {}'.format(run + 1))
        if file is not None:
            file.write('Starting validation run {}\n'.format(run + 1))
            file.flush()

        learn = []
        truth = []

        # Create datasets for run
        for i in range(k_fold):
            if i == run:
                truth = datasets[i]
            else:
                learn += datasets[i]

        # Remove labels
        validate = [x[0] for x in truth]

        # Learn
        processor.learning(learn)

        # Calculate validation data set
        result = processor.batch(validate)

        # Cache results of this run
        for c in utility.get_classes():
            precision_result = calculate_precision(truth, result, c)
            recall_result = calculate_recall(truth, result, c)

            if file is not None:
                file.write(
                    '{:6} - precision: {:6.4f}, recall: {:6.4f}\n'.format(
                        c, precision_result, recall_result))

            precision[c] += precision_result
            recall[c] += recall_result

        if file is not None:
            file.write('\n')
            file.flush()

    # Calculate average
    for c in utility.get_classes():
        precision[c] /= k_fold
        recall[c] /= k_fold

    # Print results
    logging.log(
        configserver.output_log_level(),
        'Average results from {}-fold cross-validation:'.format(k_fold))
    precision_avg = 0.0
    recall_avg = 0.0
    if file is not None:
        file.write(
            'Average results from {}-fold cross-validation:\n'.format(k_fold))
    for c in utility.get_classes():
        precision_avg += precision[c]
        recall_avg += recall[c]
        logging.log(
            configserver.output_log_level(),
            '{:6} - precision: {:6.4f}, recall: {:6.4f}'.format(
                c, precision[c], recall[c]))
        if file is not None:
            file.write('{:6} - precision: {:6.4f}, recall: {:6.4f}\n'.format(
                c, precision[c], recall[c]))

    precision_avg /= len(utility.get_classes())
    recall_avg /= len(utility.get_classes())
    logging.log(
        configserver.output_log_level(),
        '{:6} - precision: {:6.4f}, recall: {:6.4f}'.format(
            'ALL', precision_avg, recall_avg))

    # Close file if open
    if file is not None:
        file.write('{:6} - precision: {:6.4f}, recall: {:6.4f}\n'.format(
            'ALL', precision_avg, recall_avg))
        file.write('\n')
        file.close()