Exemplo n.º 1
0
lib_dir = os.path.join(run_dir, "lib")
res_dir = os.path.join(run_dir, "res")

# Our libraries
path.append (run_dir)
path.append (lib_dir)
import data_io                       # general purpose input/output functions
from data_io import vprint           # print only in verbose mode
from data_manager import DataManager # load/save data and get info about them
from models import MyAutoML          # example model from scikit learn (unused in this version)

from sklearn.cross_validation import *
from libscores import *

if debug_mode >= 4 or running_on_codalab: # Show library version and directory structure
    data_io.show_version()
    data_io.show_dir(run_dir)

# =========================== BEGIN PROGRAM ================================

if __name__=="__main__" and debug_mode<4:   
    #### Check whether everything went well (no time exceeded)
    execution_success = True

    #### INPUT/OUTPUT: Get input and output directory names
    if len(argv)==1: # Use the default input and output directories if no arguments are provided
        input_dir = default_input_dir
        output_dir = default_output_dir
    else:
        input_dir = argv[1]
        output_dir = os.path.abspath(argv[2])
Exemplo n.º 2
0
    run_dir=codalab_run_dir
    running_on_codalab = True
    print ("Running on Codalab!")
lib_dir = os.path.join(run_dir, "sample_code")
res_dir = os.path.join(run_dir, "res")

# Our libraries  
path.append (run_dir)
path.append (lib_dir)
import data_io                       # general purpose input/output functions
from data_io import vprint           # print only in verbose mode
from data_manager import DataManager # load/save data and get info about them
from classifier import Classifier    # example models from scikit learn

if debug_mode >= 4 or running_on_codalab: # Show library version and directory structure
    data_io.show_version()
    data_io.show_dir(run_dir)

# =========================== BEGIN PROGRAM ================================

if __name__=="__main__" and debug_mode<4:	
    #### Check whether everything went well (no time exceeded)
    execution_success = True
    
    #### INPUT/OUTPUT: Get input and output directory names
    if len(argv)==1: # Use the default input and output directories if no arguments are provided
        input_dir = default_input_dir
        output_dir = default_output_dir
    else:
        input_dir = argv[1]
        output_dir = os.path.abspath(argv[2]);
Exemplo n.º 3
0
def predictSpatioTemporal(step_num, input_dir, output_dir, code_dir, \
                          ext = '.h5', verbose=True, debug_mode=0, \
                          time_budget = 300, max_samples = 0, \
                          AR_order = 1, I_order = 0, MA_order = 0, \
                          num_predicted_frames=8, \
                          save_model = False, cache_data = False, \
                          cache_dir = "", \
                          version = 0.1 ):
    ''' Main spatio-temporal prediction function.
    step_num
        Current file number n being processed Xn.h5.
    input_dir
        Input directory in which the training/adapatation data are found
        in two subdirectories train/ and adapt/
    output_dir
        Output directory in which we expect Yn+1.h5 predictions to be deposited.
        The next num_frame frames must be predicted.
    code_dir
        The directory to which the participant submissions are unzipped.
    ext
        The file extensions of input and output data
    verbose
        if True, debug messages are printed
    debug_mode
        0: run the code normally, using the time budget of the task
        1: run the code normally, but limit the time to max_time
        2: run everything, but do not train, use persistence
        3: just list the directories and program version
    time_budget
        Maximum total running time in seconds.
        The code should keep track of time spent and NOT exceed the time limit.
    max_samples
        Maximum number of training samples loaded.
        Allows you to limit the number of traiining samples read for speed-up.
    Model order
        The order of an ARIMA model.
        Your training algorithm may be slow, so you may want to limit .
        the window of past frames used.
        AR_order = 1 # Persistence is order 1
        I_order = 0
        MA_order = 0
    num_predicted_frames
        Number of frames to be predicted in the future.
    save_model
        Models can eventually be pre-trained and re-loaded.
    cache_data
        Data that were loaded in the past can be cached in some
        binary format for faster reload.
    cache_dir
        A directory where to cache data.
    version
        This code's version.
    '''
    #### Check whether everything went well (no time exceeded)
    execution_success = True
    start_time = time.time()         # <== Mark starting time
    if not(cache_dir): cache_dir = code_dir # For the moment it is the code directory

    path.append (code_dir)
    path.append (os.path.join(code_dir, 'sample_code'))
    import data_io
    from data_io import vprint
    from data_manager import DataManager # load/save data and get info about them
    from model import Model              # example model implementing persistence

    vprint( verbose,  "\n====> STEP: " + str(step_num))
    vprint( verbose,  "Using input_dir: " + input_dir)
    vprint( verbose,  "Using output_dir: " + output_dir)
    vprint( verbose,  "Using code_dir: " + code_dir)
    vprint( verbose,  "Using cache_dir: " + cache_dir)

    # Make a result directory and cache_dir if they do not exist
    data_io.mkdir(output_dir)
    data_io.mkdir(cache_dir)

    # List various directories
    if debug_mode >= 3:
        vprint( verbose,  "This code version is %d" + str(version))
        data_io.show_version()
        data_io.show_dir(os.getcwd()) # Run directory
        data_io.show_io(input_dir, output_dir)
        data_io.show_dir(output_dir)

    # Our libraries
    path.append (code_dir)

    #### START WORKING ####  ####  ####  ####  ####  ####  ####  ####  ####
    vprint( verbose,  "************************************************")
    vprint( verbose,  "******** Processing data chunk number " + str(step_num) + " ********")
    vprint( verbose,  "************************************************")

    # Instantiate data and model objects
    if cache_data:
        cache_file = os.path.join(cache_dir, "Din.pickle")
    else:
        cache_file = ""
    Din = DataManager(datatype="input", verbose=verbose, cache_file=cache_file)
    Dout = DataManager(datatype="output", verbose=verbose)
    M = Model(hyper_param=(AR_order, I_order, MA_order), path=code_dir, verbose=verbose)

    # Read data training frames and train
    if step_num == 0:
        # First time we read the training data.
        train_data_dir = os.path.join(input_dir, "train")
        Din.loadTrainData(train_data_dir, max_samples=max_samples)
        # Train the model
        M.train(Din.X, Din.t) # The X matrix is the time series, the T vector are the (optional) time indices
    else:
        # Reload the already trained model and data (warm start)
        if save_model:
            M.load(path=cache_dir)
        if cache_data:
            Din.reloadData('Din', data_dir=cache_dir, format='pickle')

    # Read additional frames and append them.
    adapt_data_dir = os.path.join(input_dir, "adapt")
    Din.appendSamples(step_num, adapt_data_dir)

    # Save data for future re-use (we do not forget anything at the moment,
    # but this may be waistful in time and memory). We especially may not need
    # the training data.
    if cache_data:
        Din.saveData('Din', data_dir=cache_dir, format='pickle')

    # Adapt the model. We pass all the data we have, the model is supposed to
    # know how to use a window of data in the past.
    M.adapt(Din.X, Din.t)
    # To save the effort of re-computing predictions made by the old model to
    # correct it we could re-load past predictions (still available in the output directory).
    # For simplicity we do not do it here.

    # Eventually save the model for future re-use (warm start)
    if save_model:
        M.save(path=cache_dir)

    # Make predictions
    Dout.X = M.predict(Din.X, num_predicted_frames=num_predicted_frames)
    Dout.t = np.array(range(1, Dout.X.shape[0]+1))

    # Save predictions
    Dout.saveData('Y' + str(step_num), data_dir=output_dir, format="h5")

    time_spent = time.time() - start_time
    time_left_over = time_budget - time_spent
    if time_left_over>0:
        vprint( verbose,  "[+] Done")
        vprint( verbose,  "[+] Time spent %5.2f sec " % time_spent + "::  Time budget %5.2f sec" % time_budget)
    else:
        execution_success = 0
        vprint( verbose,  "[-] Time exceeded")
        vprint( verbose,  "[-] Time spent %5.2f sec " % time_spent + " > Time budget %5.2f sec" % time_budget)

    return execution_success