Exemplo n.º 1
0
def mainKyle():
    # Set parameters for loading the data
    num_records = 'all'
    csvfile = "Data/BKGDAT_ZeroTOTALBKD.txt"

    # Set parameters for filtering the data
    market = AirportCodes.London
    orgs=[AirportCodes.Dubai, market]
    dests=[AirportCodes.Dubai, market]
    cabins=["Y"]

    # Get the data, filter it, and group it by flight
    print "Loading " + csvfile
    f = FeatureFilter(num_records, csvfile)

    print "Filtering"
    data = f.getDrillDown(orgs=orgs, dests=dests, cabins=cabins)

    print "Grouping by flight"
    unique_flights = f.getUniqueFlights(data)

    # Encode the flights
    print "Encoding flight data"
    start = -90
    stop = 0
    num_points = 31
    interp_params = (start, stop, num_points)

    bin_size = 3
    date_reduction = 0
    cat_encoding = (bin_size, date_reduction)

    num_folds = 3

    X, y, ids = encodeFlights(unique_flights, interp_params, cat_encoding)

    _, num_features = X[0].shape

    print 'Generating k-fold'
    kf = kFoldSplit(X, y, ids, num_folds)

    
    print 'Selecting features'
    model = KNeighborsRegressor
    print sequentialForwardFeatureSelection(model, kf, num_features)
Exemplo n.º 2
0
def mainRyan():
    # Set parameters for loading the data
    num_records = 'all'
    csvfile = "Data/BKGDAT_ZeroTOTALBKD.txt"

    # Set parameters for filtering the data
    market = AirportCodes.Frankfurt
    cabins=["Y"]

    # Get the data, filter it, and group it by flight
    print "Loading " + csvfile
    f = FeatureFilter(num_records, csvfile)

    print "Filtering"
    if market is None:
        orgs=[AirportCodes.Dubai, AirportCodes.London, AirportCodes.Bahrain, AirportCodes.Frankfurt, AirportCodes.Bangkok]
        dests=[AirportCodes.Dubai, AirportCodes.London, AirportCodes.Bahrain, AirportCodes.Frankfurt, AirportCodes.Bangkok]
        data = f.getDrillDown(orgs=orgs, dests=dests, cabins=cabins)
    else:
        orgs=[AirportCodes.Dubai, market]
        dests=[AirportCodes.Dubai, market]
        data = f.getDrillDown(orgs=orgs, dests=dests, cabins=cabins)

    print "Grouping by flight"
    unique_flights = f.getUniqueFlights(data)

    # Encode the flights
    print "Encoding flight data"
    start = -90
    stop = 0
    num_points = 31
    interp_params = (start, stop, num_points)
    
    bin_size = 3
    date_reduction = 0
    cat_encoding = (bin_size, date_reduction)

    X, y, ids = encodeFlights(unique_flights, interp_params, cat_encoding)
    X_train, y_train, X_test, y_test, ids_train, ids_test = aggregateTrainTestSplit(X, y, ids, 0.75)

    return X_train, y_train, X_test, y_test, ids_train, ids_test, interp_params, cat_encoding