示例#1
0
if __name__ == '__main__':
    trainsize = 5000
    testsize = 5000
    numruns = 3

    classalgs = {  #'Random': algs.Classifier(),
        'Naive Bayes':
        algs.NaiveBayes({'usecolumnones': False}),
        'Naive Bayes Ones':
        algs.NaiveBayes({'usecolumnones': True}),
        'Linear Regression':
        algs.LinearRegressionClass(),
        'Logistic Regression Reg':
        algs.LogitReg({
            'regularizer': 'l2',
            'lamb': 0.001,
            'stepsize': 0.001
        }),
        'Logistic Regression':
        algs.LogitReg({
            'lamb': 0.001,
            'stepsize': 0.001
        }),
        'kernel Logistic Regression':
        algs.KernelLogitReg({'k': 30}),
        'Hamming kernel Logistic Regression':
        algs.KernelLogitReg({
            'kernel': 'hamming',
            'k': 20
        }),
        'Neural Network':
示例#2
0

if __name__ == '__main__':
    trainsize = 5000
    testsize = 5000
    numruns = 10

    classalgs = {  #'Random': algs.Classifier(),
        #'Naive Bayes': algs.NaiveBayes({'usecolumnones': False})
        #'Naive Bayes Ones': algs.NaiveBayes({'usecolumnones': True})
        #'Linear Regression': algs.LinearRegressionClass(),
        #'Logistic Regression': algs.LogitReg()
        #'L1 Logistic Regression': algs.LogitReg({'regularizer': 'l1'})
        #'L2 Logistic Regression': algs.LogitReg({'regularizer': 'l2'})
        'ElasticNet Logistic Regression':
        algs.LogitReg({'regularizer': 'ElasticNet'})
        #'Logistic Alternative': algs.LogitRegAlternative()
        #'Neural Network': algs.NeuralNet({'epochs': 1})
    }
    numalgs = len(classalgs)

    parameters = (
        {
            'regwgt': 0.0,
            'nh': 4
        },
        {
            'regwgt': 0.01,
            'nh': 8
        },
        {
示例#3
0
    for i in range(len(ytest)):
        if ytest[i] == predictions[i]:
            correct += 1
    return (correct / float(len(ytest))) * 100.0


def loadsusy():
    dataset = np.genfromtxt(
        'C:\\Users\\Nandini\\Documents\\Textbooks\Project BD\\Classifiers-implemented-master\\output2.csv',
        delimiter=',')
    trainset, testset = splitdataset(dataset)
    return trainset, testset


if __name__ == '__main__':
    trainset, testset = loadsusy()
    print('Running on train={0} and test={1} samples').format(
        trainset[0].shape[0], testset[0].shape[0])
    classalgs = {
        'Logistic Regression': algs.LogitReg(),
    }

    for learnername, learner in classalgs.iteritems():
        print 'Running learner = ' + learnername
        # Train model
        dividedDS = {}
        dividedDS = learner.learn(trainset[0], trainset[1])
        predictions = learner.predict(testset[0])
        accuracy = getaccuracy(testset[1], predictions)
        print 'Accuracy for ' + learnername + ': ' + str(accuracy)
 dataset = np.genfromtxt('datasets/numericsequence.csv', delimiter=',')
 # dataset = dtl.load_occupancy_dataset()
 # dtl.load_occupancy_dataset(trainsize, testsize)
 np.random.shuffle(dataset)
 errors = {}
 # accuracies = []
 classalgs = {}
 numparams = 0
 parameters = {}
 for i in range(5):
     smallDataSet = dataset[i * 1300:(i + 1) * 1300]
     classalgs = {
         'Naive Bayes':
         algs.NaiveBayes({'usecolumnones': False}),
         'Logistic Regression':
         algs.LogitReg(),
         'Neural Network':
         algs.NeuralNet({
             'epochs': 100,
             'stepsize': 0.01,
             'nh': 8,
             'ni': 19
         })
         # 'L1 Logistic Regression': algs.LogitReg({'regularizer': 'l1'}),
         # 'L2 Logistic Regression': algs.LogitReg({'regularizer': 'l2'}),
     }
     numalgs = len(classalgs)
     parameters = (
         # {'regwgt': 0.0, 'nh': 4},
         {
             'regwgt': 0.01,
def classify():
    # init variables
    run = True
    plot = True
    trainsize = 12500
    testsize = 12500
    numruns = 1
    k_fold = False
    dataset_file = "data.csv"

    classalgs = {'Logistic Regression': algs.LogitReg()}
    numalgs = len(classalgs)

    num_steps = 1
    parameters = (
        {
            'regularizer': 'None',
            'stepsize': 0.001,
            'num_steps': num_steps,
            'batch_size': 2
        },
        #{'regularizer': 'None', 'stepsize':0.01, 'num_steps':300, 'batch_size':20},
    )
    numparams = len(parameters)

    accuracy = {}
    for learnername in classalgs:
        accuracy[learnername] = np.zeros((numparams, numruns))

    # load dataset & shuffle
    dataset = dp.readcsv(dataset_file)
    Y = cc.getData("ia_success")
    Y = np.array(Y).astype(np.float)
    #X = cc.getListedData("fbp_HFI")
    X = cc.getListedDataList([
        'fbp_CFB', 'fbp_CFC', 'fbp_HFI', 'fbp_RAZ', 'fbp_ROS', 'fbp_SFC',
        'fbp_TFC', 'fbp_HFI_class'
    ])
    #X = cc.getListedDataList(['assessment_result',  'max_size', 'first_size', 'first_status_held', 'sec_to_uc', 'aircraft_n_Fixed', 'aircraft_n_Rotary', 'aircraft_n_total', 'aircraft_hr_Fixed', 'aircraft_hr_Rotary', 'aircraft_hr_total', 'n_firefighters', 'n_non_firefighters', 'hr_firefighters', 'hr_non_firefighters', 'drop_amount_retardant', 'drop_amount_water', 'drop_amount_total', 'n_fire_past_1',
    #'n_fire_past_7', 'n_fire_past_30', 'response_time', 'general_cause', 'year', 'month', 'latitude', 'longitude', 'assessment_size', 'fire_spread_rate', 'fire_position_on_slope', 'temperature', 'relative_humidity', 'wind_direction', 'wind_speed', 'weather_conditions_over_fire', 'equipment_Transportation', 'equipment_Water_Delivery', 'equipment_Sustained_Action', 'equipment_Fire_Guard_Building',
    #'equipment_Crew_Gear', 'equipment_Base_Camp', 'equipment_WaterTruck_Transportation', 'wstation_dry_bulb_temperature', 'wstation_relative_humidity', 'wstation_wind_speed_kmh', 'wstation_wind_direction', 'wstation_precipitation', 'wstation_fine_fuel_moisture_code', 'wstation_duff_moisture_code', 'wstation_drought_code', 'wstation_build_up_index', 'wstation_initial_spread_index', 'wstation_fire_weather_index', 'wstation_daily_severity_rating', 'fuelgrid_C', 'fuelgrid_D', 'fuelgrid_M', 'fuelgrid_Nonfuel', 'fuelgrid_O',
    #'fuelgrid_S', 'fuelgrid_Unclassified', 'fuelgrid_Water', 'fuel_type2', 'grouped_fuel_type2', 'fbp_CFB', 'fbp_CFC', 'fbp_FD', 'fbp_HFI', 'fbp_RAZ', 'fbp_ROS', 'fbp_SFC', 'fbp_TFC', 'fbp_HFI_class', 'fuel_type', 'grouped_fuel_type', 'test_i'
    #])
    X = cc.getListedDataList([
        'max_size', 'first_size', 'first_status_held', 'sec_to_uc',
        'aircraft_n_Fixed', 'aircraft_n_Rotary', 'aircraft_n_total',
        'aircraft_hr_Fixed', 'aircraft_hr_Rotary', 'aircraft_hr_total',
        'n_firefighters', 'n_non_firefighters', 'hr_firefighters',
        'hr_non_firefighters', 'drop_amount_retardant', 'drop_amount_water',
        'drop_amount_total', 'n_fire_past_1', 'n_fire_past_7',
        'n_fire_past_30', 'response_time', 'general_cause', 'year', 'month',
        'latitude', 'longitude', 'assessment_size', 'fire_spread_rate',
        'fire_position_on_slope', 'temperature', 'relative_humidity',
        'wind_direction', 'wind_speed', 'weather_conditions_over_fire',
        'equipment_Transportation', 'equipment_Water_Delivery',
        'equipment_Sustained_Action', 'equipment_Fire_Guard_Building',
        'equipment_Crew_Gear', 'equipment_Base_Camp',
        'equipment_WaterTruck_Transportation', 'wstation_dry_bulb_temperature',
        'wstation_relative_humidity', 'wstation_wind_speed_kmh',
        'wstation_wind_direction', 'wstation_precipitation',
        'wstation_fine_fuel_moisture_code', 'wstation_duff_moisture_code',
        'wstation_drought_code', 'wstation_build_up_index',
        'wstation_initial_spread_index', 'wstation_fire_weather_index',
        'wstation_daily_severity_rating', 'fuelgrid_C', 'fuelgrid_D',
        'fuelgrid_M', 'fuelgrid_Nonfuel', 'fuelgrid_O', 'fuelgrid_S',
        'fuelgrid_Unclassified', 'fuelgrid_Water', 'fuel_type2',
        'grouped_fuel_type2', 'fbp_CFB', 'fbp_CFC', 'fbp_FD', 'fbp_HFI',
        'fbp_RAZ', 'fbp_ROS', 'fbp_SFC', 'fbp_TFC', 'fbp_HFI_class',
        'fuel_type', 'grouped_fuel_type', 'test_i'
    ])
    #print(X)
    X = np.array(X).astype(np.float)
    #trainX, testX = pickle. load(open(dataset_file, "rb"))

    #trainY = np.append(np.zeros(len(trainX[0][2500:])),np.ones(len(trainX[1][2500:])))
    #testY = np.append(np.zeros(len(testX[0])),np.ones(len(testX[1])))
    #valY = np.append(np.zeros(2500),np.ones(2500))

    #valX = np.append(trainX[0][:2500], trainX[1][:2500], axis=0)
    #trainX = np.append(trainX[0][2500:], trainX[1][2500:], axis=0)
    #testX = np.append(testX[0], testX[1], axis=0)

    np.random.seed(3111)
    np.random.shuffle(X)
    np.random.seed(3111)
    np.random.shuffle(Y)

    trainX = X[:len(X) // 2]
    valX = X[len(X) // 2:len(X) * 3 // 4]
    testX = X[len(X) * 3 // 4:]

    trainY = Y[:len(Y) // 2]
    valY = Y[len(Y) // 2:len(Y) * 3 // 4]
    testY = Y[len(Y) * 3 // 4:]

    # Run
    if run:
        for r in range(numruns):
            print(
                ('Running on train={0}, val={1}, test={2} samples for run {3}'
                 ).format(trainX.shape[0], valX.shape[0], testX.shape[0], r))

            # test different parameters (only one for this assignment)
            for p in range(numparams):
                params = parameters[p]

                # only one algorithm for now
                for learnername, learner in classalgs.items():
                    # Reset learner for new parameters
                    learner.reset(params)
                    print('Running learner = ' + learnername +
                          ' on parameters ' + str(learner.getparams()))
                    # Train model
                    #print("trainset0: ", trainset[0])
                    learner.learn(trainX, trainY, valX, valY, testX, testY)
                    # Test model
                    predictions = learner.predict(testX)
                    acc = utils.getaccuracy(testY, predictions)
                    print('accuracy for ' + learnername + ': ' + str(acc))
                    accuracy[learnername][p, r] = acc

    # plot
    if plot == True:
        print("PLOT!")
        accuracy_val, accuracy_test, accuracy_train, best_accuracy, best_weight = pickle.load(
            open("learning_acc.pkl", "rb"))
        print("best_accuracy : val,train,test", accuracy_val, accuracy_train,
              accuracy_test)
        epi = np.arange(0, num_steps, 1)
        plt.plot(epi, accuracy_val, label='validation accuracy : 1')
        plt.plot(epi, accuracy_test, label='test accuracy : 2')
        plt.plot(epi, accuracy_train, label='train accuracy : 3')
        plt.xlabel('epochs')
        plt.ylabel('Accuracy %')
        plt.legend()
        plt.show()
def geterror(ytest, predictions):
    return 100.0 - getaccuracy(ytest, predictions)


if __name__ == '__main__':
    trainsize = 70000
    testsize = 30000
    numruns = 3

    classalgs = {
        # 'Random': algs.Classifier(),
        # 'Naive Bayes': algs.NaiveBayes({'usecolumnones': False}),
        # 'Naive Bayes Ones': algs.NaiveBayes({'usecolumnones': True}),
        # 'Linear Regression': algs.LinearRegressionClass(),
        'Logistic Regression': algs.LogitReg(),
        'Radial Basis Transformation': algs.LogitReg(),
        # 'L1 Logistic Regression': algs.LogitReg({'regularizer': 'l1'}),
        # 'L2 Logistic Regression': algs.LogitReg({'regularizer': 'l2'}),
        # 'ElasticNet Logistic Regression': algs.LogitReg({'regularizer': 'elasticNet'}),
        # 'Logistic Alternative': algs.LogitRegAlternative(),
        # 'Neural Network': algs.NeuralNet({'epochs': 100, 'stepsize': 0.01, 'nh': 8, 'ni': 19})
    }
    numalgs = len(classalgs)

    parameters = (
        # {'regwgt': 0.0, 'nh': 4},
        {
            'regwgt': 0.01,
            'nh': 8
        },
def geterror(ytest, predictions):
    return (100.0 - getaccuracy(ytest, predictions))


if __name__ == '__main__':
    trainsize = 5000
    testsize = 5000
    numruns = 10

    classalgs = {
        'Random': algs.Classifier(),
        'Naive Bayes': algs.NaiveBayes({'usecolumnones': False}),
        'Naive Bayes Ones': algs.NaiveBayes({'usecolumnones': True}),
        'Linear Regression': algs.LinearRegressionClass(),
        'Logistic Regression': algs.LogitReg(),
        'L1 Logistic Regression': algs.LogitReg({'regularizer': 'l1'}),
        'L2 Logistic Regression': algs.LogitReg({'regularizer': 'l2'}),
        'Logistic Alternative': algs.LogitRegAlternative(),
        'Neural Network': algs.NeuralNet({'epochs': 100})
    }
    numalgs = len(classalgs)

    parameters = (
        {
            'regwgt': 0.0,
            'nh': 4
        },
        {
            'regwgt': 0.01,
            'nh': 8
示例#8
0
    obj = []
    #trainset, testset = loadmadelon()
    print('Running on train={0} and test={1} samples').format(
        trainset[0].shape[0], testset[0].shape[0])
    nnparams = {'ni': trainset[0].shape[1], 'nh': 64, 'no': 1}
    """type parameter should be L1,L2,None or Other"""
    """regwt should be user defined parameter"""

    lrparms = {'regwt': 0, 'type': "None"}
    classalgs = {
        'Random': algs.Classifier(),
        'Linear Regression': algs.LinearRegressionClass(),
        'Naive Bayes': algs.NaiveBayes({'usecolumnones': False}),
        'Naive Bayes Ones': algs.NaiveBayes(),
        'My Classifier': algs.MyClassifier(),
        'Logistic Regression': algs.LogitReg(lrparms),
        'Neural Network': algs.NeuralNet(nnparams)
    }

    classalgs1 = collections.OrderedDict(sorted(classalgs.items()))

    for learnername, learner in classalgs1.iteritems():
        print 'Running learner = ' + learnername

        # Train model
        if learnername == "Linear Regression":
            lobj = learner

        if learnername == "Logistic Regression":
            learner.learn(trainset[0], trainset[1], lobj)
        else:
示例#9
0
    testsize = 5000
    numruns = 10

    classalgs = {
        'Random': algs.Classifier(),
        #'Naive Bayes': algs.NaiveBayes({'usecolumnones': False}),
        #'Naive Bayes Ones': algs.NaiveBayes({'usecolumnones': True}),
        'Linear Regression': algs.LinearRegressionClass(),
        #'Logistic Regression': algs.LogitReg(),
        #'L1 Logistic Regression': algs.LogitReg({'regularizer': 'l1'}),
        #'L2 Logistic Regression': algs.LogitReg({'regularizer': 'l2'}),
        #'Logistic Alternative': algs.LogitRegAlternative(),
        #'Neural Network': algs.NeuralNet({'epochs': 100})
        #'RBF_linearRegression ': algs.RBF_linearRegression(),
        'RBF_LogitReg': algs.RBF_LogitReg(),
        'LogitReg': algs.LogitReg()
    }
    numalgs = len(classalgs)

    parameters = ({
        'beta': 0.5
    },
                  #{'beta':1.0},
                  #{'beta':2.0}
                  )
    numparams = len(parameters)

    errors = {}
    for learnername in classalgs:
        errors[learnername] = np.zeros((numparams, numruns))
示例#10
0
       """The choice of the number of folds should be user-input"""
       fold=10
    
       trainlabel=np.reshape(trainset[1],(-1,1))
       trset = np.hstack((trainset[0],trainlabel))
       numinputs = trset.shape[1]-1
       np.random.shuffle(trset)
       parts = [trset[i::fold] for i in xrange(fold)]
       obj=[] 
       print('Running on train={0} and test={1} samples').format(trainset[0].shape[0], testset[0].shape[0])
       parm_pass={'Neural Network':{'ni': trset.shape[1]-1, 'nh': 0, 'no': 1},
               'Logistic Regression':{'regwt':0,'type':"L2"}}
               
       classalgs = {'Linear Regression': algs.LinearRegressionClass(),
                    'Naive Bayes Ones': algs.NaiveBayes(),
                    'Logistic Regression': algs.LogitReg(parm_pass['Logistic Regression']),
                    'Neural Network': algs.NeuralNet(parm_pass['Neural Network'])
                 }
                 
       classalgs1 = collections.OrderedDict(sorted(classalgs.items())) 
        
       best_parm=[]
       
       for learnername , learner in classalgs1.iteritems():
        
           print 'Running learner = ' + learnername
        
#           # Train model
           parm_accuracy={}
        
           for j in range(0,len(parm_dict[learnername])):