Exemplo n.º 1
0
def main(train_generator_list, labels, elements_count, classifier_name, k, smoothing_param, distancematrix, test_generator):
    PRINTER("Finding label list...")
    get_labels_of_record = mc2lmc_tomka_blad
    find_all_labels = lambda frecords: get_labels_min_occurence(lambda: gen_lmc(frecords), 1)
    
    PRINTER("Loading distance matrix...")
    import sys
    sys.path.append(r'../')
    from data_io.matrix_io import fread_smatrix
    (rows, cols, data) = fread_smatrix(distancematrix)
    id2rowind, id2colind = {}, {}
    for ind, id in enumerate(rows):
        id2rowind[id] = ind
    for ind, id in enumerate(cols):
        id2colind[id] = ind
        
    #print "len(train_generator_list):",len(train_generator_list)
    #print "len(test_generator_list):",len(test_generator)
    #print "len(rows):",len(rows) 
    #print "(rows, cols, data):", (rows, cols, data)
    
    
    PRINTER("Training classifier...")
    from time import time
    
    def printer(x):
        #import logging
        logging.info('['+classifier_name+']'+x)

    def distance(a, b): 
        try:
            return data[id2rowind[a['an']]][id2colind[b['an']]]
        except:
            return data[id2colind[b['an']]][id2rowind[a['an']]]
        
        
    start = time()
    if classifier_name=='mlknn_basic':
        def get_neighbours(sample, k):
            return find_closest_points_sorted(sample, train_generator_list, [sample], k, distance)
        k = int(k)
        from mlknn import mlknn_basic
        classifier = mlknn_basic.MlknnBasic(train_generator_list, get_neighbours, k, smoothing_param, get_labels_of_record, lambda x:1, printer)
    
    elif classifier_name == 'mlknn_threshold':
        def get_neighbours(sample, k):
            return find_closest_points_sorted(sample, train_generator_list, [sample], k, distance)
        k = int(k)
        from mlknn import mlknn_threshold
        classifier = mlknn_threshold.MlknnThreshold(train_generator_list, get_neighbours, k, smoothing_param, get_labels_of_record, lambda x:1, printer)
        
    elif classifier_name == 'mlknn_tensembled':
        def get_neighbours(sample, k):
            return find_closest_points_sorted(sample, train_generator_list, [sample], k, distance)
        k = map(int, k.strip().split(','))
        PRINTER("loaded k-list: "+str(k))
        from mlknn import mlknn_tensembled
        classifier = mlknn_tensembled.MlknnTEnsembled(train_generator_list, get_neighbours, k, get_labels_of_record, lambda x:1, printer)
    
    elif classifier_name=='mlknn-basic-tree':
        def get_neighbours(sample, k, train_gen):
            return find_closest_points_sorted(sample, train_gen, [sample], k, distance)
        k = int(k)
        from mlknn import mlknn_basic
        mlknn_callable = lambda train_gen, get_labels_of_record_arg: mlknn_basic.MlknnBasic(train_gen, lambda sample, k: get_neighbours(sample, k, train_gen), 
                                                                           k, smoothing_param, get_labels_of_record_arg, lambda x:1, printer)
        label_mappings = (lambda x: x[:2], lambda x: x[:3], lambda x: x)
        from mltools.ml_hierarchical import MlHierarchical
        classifier = MlHierarchical(train_generator_list, mlknn_callable, label_mappings, get_labels_of_record)
    
    elif classifier_name == 'mlknn-threshold-tree':
        def get_neighbours(sample, k, train_gen):
            return find_closest_points_sorted(sample, train_gen, [sample], k, distance)
        k = int(k)
        from mlknn import mlknn_threshold
        mlknn_callable = lambda train_gen, get_labels_of_record_arg: mlknn_threshold.MlknnThreshold(train_gen, lambda sample, k: get_neighbours(sample, k, train_gen), 
                                                                           k, smoothing_param, get_labels_of_record_arg, lambda x:1, printer)
        label_mappings = (lambda x: x[:2], lambda x: x[:3], lambda x: x)
        from mltools.ml_hierarchical import MlHierarchical
        classifier = MlHierarchical(train_generator_list, mlknn_callable, label_mappings, get_labels_of_record)
    
    elif classifier_name == 'mlknn-tensembled-tree':
        def get_neighbours(sample, k, train_gen):
            return find_closest_points_sorted(sample, train_gen, [sample], k, distance)
        k = map(int, k.strip().split(','))
        PRINTER("loaded k-list: "+str(k))
        from mlknn import mlknn_tensembled
        mlknn_callable = lambda train_gen, get_labels_of_record_arg: mlknn_tensembled.MlknnTEnsembled(train_gen, lambda sample, k: get_neighbours(sample, k, train_gen),
                                                                                                      k, get_labels_of_record_arg, lambda x:1, printer)
        label_mappings = (lambda x: x[:2], lambda x: x[:3], lambda x: x)
        from mltools.ml_hierarchical import MlHierarchical
        classifier = MlHierarchical(train_generator_list, mlknn_callable, label_mappings, get_labels_of_record)
    
    
    PRINTER("Time taken for training:"+str(start-time()))    
    PRINTER("------------------------")
    PRINTER("---Testing classifier---")
    PRINTER("------------------------")

    classify_oracle = mc2lmc_tomka_blad
    from mltools.multilabel_evaluate import multilabel_evaluate, multilabel_evaluate_printresults
    accuracy, precision, recall, hammingloss, subset01loss, fmeasure = multilabel_evaluate(lambda: test_generator, classify_oracle, classifier.__getattribute__('classify'), len(labels), 
                    [('full label', lambda x: x), ('half label', lambda x: x[:3]), ('low label', lambda x: x[:2])])
    PRINTER("-----------RESULTS-----------")
    multilabel_evaluate_printresults(accuracy, precision, recall, hammingloss, subset01loss, fmeasure, PRINTER)
    return accuracy, precision, recall, hammingloss, subset01loss, fmeasure
Exemplo n.º 2
0
def jrs_evaluate(results_oracle, results_classifier):
    '''
    Evaluate a multilabel classifier.
    
    @type results_oracle: list of lists of strings
    @param results_classifier: labels assigned to each consecutive object by
        a an expert
    
    @type results_classifier: list of lists of strings
    @param results_classifier: labels assigned to each consecutive object by
        a classifier being evaluated
        
    '''
    precs = []
    recals = []
    f1s = []
    for oracle, pred in izip(results_oracle, results_classifier):
        try:
            prec = float(len(set(oracle).intersection(set(pred)))) / len(set(pred))
        except:
            prec = 0.0
        try:
            recall = float(len(set(oracle).intersection(set(pred)))) / len(set(oracle))
        except:
            recall = 0.0
        try:
            f1 = 2.0*prec*recall/(prec+recall)
        except:
            f1 = 0.0
        precs.append(prec)
        recals.append(recall)
        f1s.append(f1)
    
    avg_prec = sum(precs) / len(precs)
    avg_recal = sum(recals) / len(recals)
    avg_f1 = sum(f1s) / len(f1s)
    
    return  0.0, avg_prec, avg_recal, 0.0, 0.0, avg_f1 
    #print '[TOMKOWE]', avg_prec, avg_recal, avg_f1 
    
    labels_len = -1
    try:
        labels_len = len(set(reduce(lambda a, b: a+b, results_oracle+results_classifier)))
    except:
        print "[jrs_evaluate]: Blad w liczeniu reduce! results_oracle:", results_oracle, "results_classifier", results_classifier
        raise Exception("x")
    #oracle_ans = map(lambda x: set(x), results_oracle)
    #classif_ans = map(lambda x: set(x), results_classifier)
    #all_ans = set()
    #for i in oracle_ans+classif_ans:
    #    all_ans |= i
    
    #print 'all_ans', all_ans
    #print 'len(all_ans)', len(all_ans)
    
    #labels_len = len(all_ans)
    num_of_objects = len(results_oracle)
    test_generator = lambda: xrange(num_of_objects)
    
    classify_oracle = lambda x: results_oracle[x]
    classify_try = lambda x: results_classifier[x]
    
    results = multilabel_evaluate.multilabel_evaluate(test_generator, classify_oracle, classify_try, labels_len, {'full label': lambda x: x})
    
    #wynik = map(lambda x: x['full label'], results)
    #print '[Michalowe]', wynik[1], wynik[2], wynik[5] 
    
    return map(lambda x: x['full label'], results)