Exemplo n.º 1
0
def model_selection_without_normalization(distance_funcs, Xtrain, ytrain, Xval, yval):
    # distance_funcs: dictionary of distance funtion
    # Xtrain: List[List[int]] train set
    # ytrain: List[int] train labels
    # Xval: List[List[int]] validation set
    # yval: List[int] validation labels
    # return best_model: an instance of KNN
    # return best_k: best k choosed for best_model
    # return best_func: best function choosed for best_model
    
    print(Xtrain,ytrain,Xval,yval)
    
    model=KNN(1,distance_funcs['euclidean'])
    
    optf1=0
    bestk=-1
    bestfunc=''
    maxk=29
    if(len(Xtrain)<maxk):
        maxk=len(Xtrain)-1
        
    
    for key_func in distance_funcs:
        k=1
        while(k<=maxk):
            model.train(Xtrain,ytrain)
            model.k=k
            model.distance_function=distance_funcs[key_func]
            ypre=model.predict(Xval)
            get_f1=f1_score(yval,ypre)
            
            print('[part 1.1] {name}\tk: {k:d}\t'.format(name=key_func, k=k) +
                      'valid: {valid_f1_score:.5f}'.format(valid_f1_score=get_f1))
            print()
            
            if(get_f1>optf1):
                bestk=k
                bestfunc=key_func
                optf1=get_f1
                
            
            k+=2
    print("bestk:  ",bestk,"bestfunc:  ",key_func)
    model.k=bestk
    model.distance_function=distance_funcs[bestfunc]
    return model,bestk,bestfunc
Exemplo n.º 2
0
def model_selection_with_transformation(distance_funcs, scaling_classes, Xtrain, ytrain, Xval, yval):
    # distance_funcs: dictionary of distance funtion
    # scaling_classes: diction of scalers
    # Xtrain: List[List[int]] train set
    # ytrain: List[int] train labels
    # Xval: List[List[int]] validation set
    # yval: List[int] validation labels
    # return best_model: an instance of KNN
    # return best_k: best k choosed for best_model
    # return best_func: best function choosed for best_model
    # return best_scaler: best function choosed for best_model
    model=KNN(1,distance_funcs['euclidean'])
    # initilize
    bestk=1
    bestfunc='euclidean'
    bestscaler='min_max_scale'
    optf1=0
    kmax=29
    if(len(Xtrain)<kmax):
        kmax=len(Xtrain)-1
    
    for scaling_name in scaling_classes:
            
            scaling=scaling_classes[scaling_name]()
            New_Xtrain=scaling.__call__(Xtrain)
            
            New_Xval=scaling.__call__(Xval)
            
            print(scaling_name,New_Xval)
            
            for key_func in distance_funcs:
                k=1
         
                while(k<kmax):
                
                    model.k=k
                    model.distance_function=distance_funcs[key_func]
                    model.train(New_Xtrain,ytrain)
                    
                    
                    ypreval=model.predict(New_Xval)
                
                    get_f1=f1_score(yval,ypreval)
                    if(get_f1>optf1):
                        bestk=k
                        bestfunc=key_func
                        bestscaler=scaling_name
                        optf1=get_f1
                    
                    print('[part 1.2] {name}\t{scaling_name}\tk: {k:d}\t'.format(name=key_func, scaling_name=scaling_name, k=k) +
                            'valid: {valid_f1_score:.5f}'.format(valid_f1_score=get_f1))
                    
                    print()
                    
                    k+=2
                    
    model.k=bestk
    model.distance_function=distance_funcs[key_func]
    model.scale=scaling_classes[bestscaler]
    print("bestk:  ",bestk,"bestfunc:  ",bestfunc,"bestscale:  ",bestscaler)
   
    
    return model,bestk,bestfunc,bestscaler
    raise NotImplementedError
Exemplo n.º 3
0
def model_selection_with_transformation(distance_funcs, scaling_classes,
                                        Xtrain, ytrain, Xval, yval):
    # distance_funcs: dictionary of distance funtion
    # scaling_classes: diction of scalers
    # Xtrain: List[List[int]] train set
    # ytrain: List[int] train labels
    # Xval: List[List[int]] validation set
    # yval: List[int] validation labels
    # return best_model: an instance of KNN
    # return best_k: best k choosed for best_model
    # return best_func: best function choosed for best_model
    # return best_scaler: best function choosed for best_model
    # ifthere are less than 30 points in dataset, choose n-1 as the upper bound of K.
    #'n' is the number of points in dataset.
    # You can choose N-1 as best k if N-1 is an odd number.
    best_scale = None
    best_score = 0.0
    best_distance = None
    best_func = ""
    best_k = 0
    max_score = 0.0
    #modified due to grading instructions
    if len(Xtrain) < 30:
        kvals = np.arange(1, len(Xtrain), 2)
    kvals = np.arange(1, len(Xtrain), 2)
    train_f1_score = 1.0
    valid_f1_score = 0.0
    model = None
    for scaling_name, new_scaler in scaling_classes.items():
        print(scaling_name, new_scaler)
        scaler = new_scaler()
        scaled_Xtrain = scaler(Xtrain)
        scaled_Xval = scaler(Xval)
        for name, f in distance_funcs.items():
            for k in kvals:
                if k == 1:
                    model = KNN(k, f)
                    model.train(scaled_Xtrain, ytrain)
                else:
                    model.k = k
                valid_f1_score = f1_score(yval, model.predict(scaled_Xval))
                if valid_f1_score > max_score:
                    max_score = valid_f1_score
                    best_distance = f
                    best_k = k
                    best_func = name
                    best_scale = scaling_name
                if valid_f1_score == max_score:
                    if k < best_k:
                        max_score = valid_f1_score
                        best_distance = f
                        best_k = k
                        best_func = name
                        best_scale = scaling_name

    model.k = best_k
    model.distance_function = best_distance
    model.f1 = max_score
    model.scaler = scaling_classes[best_scale]

    best_model = KNN(best_k, best_distance)
    best_model.scaler = scaling_classes[best_scale]
    best_model.train(Xtrain, ytrain)
    #print('best score: ', best_score)
    #print('best k: ', best_k)
    #print('best scaler: ', scaling_name)

    # Dont change any print statement

    print('[part 1.1] {name}\tk: {k:d}\t'.format(name=name, k=k) +
          'train:{train_f1_score:.5f}\t'.format(
              train_f1_score=train_f1_score) +
          'valid: {valid_f1_score:.5f}'.format(valid_f1_score=valid_f1_score))

    print('[part 1.2] {name}\t{scaling_name}\tk: {k:d}\t'.format(
        name=name, scaling_name=scaling_name, k=k) +
          'train: {train_f1_score:.5f}\t'.format(
              train_f1_score=train_f1_score) +
          'valid: {valid_f1_score:.5f}'.format(valid_f1_score=valid_f1_score))

    print()
    print('[part 1.2] {name}\t{scaling_name}\t'.format(
        name=name, scaling_name=scaling_name) +
          'best_k: {best_k:d}\t'.format(best_k=best_k))
    print()
    return best_model, best_k, best_func, best_scale
Exemplo n.º 4
0
def model_selection_without_normalization(distance_funcs, Xtrain, ytrain, Xval,
                                          yval):
    # distance_funcs: dictionary of distance funtion
    # Xtrain: List[List[int]] train set
    # ytrain: List[int] train labels
    # Xval: List[List[int]] validation set
    # yval: List[int] validation labels
    # return best_model: an instance of KNN
    # return best_k: best k choosed for best_model
    # return best_func: best function choosed for best_model

    best_distance = None
    best_function = ""
    best_k = 0
    max_score = 0.0
    kvals = np.arange(1, len(Xtrain), 2)
    train_f1_score = 1.0
    valid_f1_score = 0.0
    model = None

    for name, f in distance_funcs.items():
        for k in kvals:
            if k == 1:
                model = KNN(k, f)
                model.train(Xtrain, ytrain)
            else:
                model.k = k
            # train_f1_score =f1_score(ytrain,model.predict(Xtrain))
            # predicted=model.predict(Xval)
            valid_f1_score = f1_score(yval, model.predict(Xval))
            if valid_f1_score > max_score:
                max_score = valid_f1_score
                print("new valid score: ", valid_f1_score)
                best_distance = f
                best_function = name
                best_k = k
                print('**NEW BEST MODEL**')
            if valid_f1_score == max_score:
                if k < best_k:
                    max_score = valid_f1_score
                    print("new valid score: ", valid_f1_score)
                    best_distance = f
                    best_function = name
                    best_k = k

    # best_model=KNN(best_k,best_distance)
    # best_model.train(Xtrain,ytrain)
    model.k = best_k
    model.distance_function = best_distance
    model.f1 = max_score

    best_model = KNN(best_k, best_distance)
    best_model.train(Xtrain, ytrain)

    # Dont change any print statement
    print('[part 1.1] {name}\tk: {k:d}\t'.format(name=name, k=k) +
          'train: {train_f1_score:.5f}\t'.format(
              train_f1_score=train_f1_score) +
          'valid: {valid_f1_score:.5f}'.format(valid_f1_score=valid_f1_score))
    print('[part 1.1] {name}\tk: {k:d}\t'.format(name=name, k=k) +
          'train: {train_f1_score:.5f}\t'.format(
              train_f1_score=train_f1_score) +
          'valid: {valid_f1_score:.5f}'.format(valid_f1_score=valid_f1_score))

    print()
    print('[part 1.1] {name}\tbest_k: {best_k:d}\t'.format(name=name,
                                                           best_k=best_k))
    return best_model, best_k, best_function