def build_model():
    # generate dummy dataset
    def importdict(filename):  #creates a function to read the csv
        #create data frame from csv with pandas module
        df = pd.read_csv(filename + '.csv',
                         names=['systemtime', 'Var1', 'var2'],
                         sep=';',
                         parse_dates=[0])  #or:, infer_datetime_format=True)
        fileDATES = df.T.to_dict().values(
        )  #export the data frame to a python dictionary
        return fileDATES  #return the dictionary to work with it outside the function

    fileDATES = importdict('clustering')
    timebuffer = []
    for i in range(1, len(fileDATES)):
        timebuffer.append((fileDATES[i]['systemtime'].split(",")
                           )[2])  #append only time into list #A
    #load_data = genfromtxt('.\\clustering.csv', delimiter=',')[1:5185,-3]

    CarsSpeed = genfromtxt('.\\clustering.csv', delimiter=',')[1:, -3]
    CarsTotal = genfromtxt('.\\clustering.csv', delimiter=',')[1:, 4]
    hol = genfromtxt('.\\clustering.csv', delimiter=',')[1:, -7]

    #filter data in time range 6.00am to 9.00am

    #get speed since 6.00 am to 9.00 am
    speed = []
    for i in range(0, len(timebuffer)):
        if timebuffer[i] == '6:00':
            while timebuffer[i] != '9:05':
                speed.append(CarsSpeed[i])
                i += 1
    speed = np.array(speed)

    #get number of car since 6.00 am to 9.00 am
    num_car = []
    for i in range(0, len(timebuffer)):
        if timebuffer[i] == '6:00':
            while timebuffer[i] != '9:05':
                num_car.append(CarsTotal[i])
                i += 1
    num_car = np.array(num_car)

    #get holiday data since 6.00 am to 9.00 am
    holiday = []
    for i in range(0, len(timebuffer)):
        if timebuffer[i] == '6:00':
            while timebuffer[i] != '9:05':
                holiday.append(hol[i])
                i += 1
    holiday = np.array(holiday)

    #combine speed and number of car into dataset 2d array
    #get dataset = [speed,num_car,holiday]
    dataset = np.array([[]])
    for i in range(0, len(speed)):
        buffer = np.array([])
        buffer = np.append(buffer, round(speed[i]))
        buffer = np.append(buffer, round(num_car[i]))
        buffer = np.append(buffer, round(holiday[i]))
        buffer2 = np.array([buffer])
        if i == 0:
            dataset = buffer2
        else:
            dataset = np.concatenate((dataset, buffer2))
    dataset = (np.asarray(dataset, 'float32'))

    rescale = np.array([[]])
    for i in range(0, dataset.shape[0]):
        buffer = np.array([])
        buffer = np.append(buffer, (dataset[i, 0] - np.min(speed)) /
                           (np.max(speed) - np.min(speed)))
        buffer = np.append(buffer, (dataset[i, 1] - np.min(num_car)) /
                           (np.max(num_car) - np.min(num_car)))
        buffer = np.append(buffer, (dataset[i, 2] - np.min(holiday)) /
                           (np.max(holiday) - np.min(holiday)))
        buffer2 = np.array([buffer])
        if i == 0:
            rescale = buffer2
        else:
            rescale = np.concatenate((rescale, buffer2))

    X_train = rescale[:-1]
    X_test = X_train
    Y_train = rescale[1:]
    Y_test = dataset[1:]
    # setup model structure
    print('Creating training model...')
    rbm = GBRBM(hidden_dim,
                input_dim=input_dim,
                init=glorot_uniform_sigm,
                activation='sigmoid',
                nb_gibbs_steps=nb_gibbs_steps,
                persistent=True,
                batch_size=batch_size,
                dropout=0.0)

    rbm.srng = RandomStreams(seed=srng_seed)

    train_model = Sequential()

    train_model.add(rbm)
    #train_model.add(Dense(1, activation='sigmoid'))

    #train_model.summary()
    opt = SGD(lr, 0., decay=0.0, nesterov=False)
    loss = rbm.contrastive_divergence_loss
    metrics = [rbm.reconstruction_loss]

    # compile theano graph
    print('Compiling Theano graph...')
    train_model.compile(optimizer=opt, loss=loss, metrics=metrics)

    # do training
    print('Training...')
    train_model.fit(X_train,
                    Y_train,
                    batch_size,
                    nb_epoch,
                    verbose=1,
                    shuffle=False)

    # generate hidden features from input data
    print('Creating inference model...')

    h_given_x = rbm.get_h_given_x_layer(as_initial_layer=True)

    inference_model = Sequential()
    #inference_model.add(Dense(6, input_dim = 2, activation='relu'))
    inference_model.add(h_given_x)
    #inference_model.add(Dense(8, activation='relu'))
    #inference_model.add(SampleBernoulli(mode='maximum_likelihood'))

    print('Compiling Theano graph...')
    inference_model.compile(opt, loss='mean_squared_error')

    print('Doing inference...')
    h = inference_model.predict(X_test)
    print(h)

    #convert result to real speed
    speed_result = []
    for i in range(0, len(h)):
        speed_result.append(
            round((h[i, 0] * (np.max(speed) - np.min(speed)) + np.min(speed)
                   )))  #transfrom all predicted value into speed value
    speed_result = np.array(speed_result)
    print(speed_result)

    #Evaluation part
    #set speed difference threshold
    #threshold = genfromtxt('.\\clustering.csv', delimiter=',')[1:,-5]
    threshold = 5
    check_count = 0
    for i in range(0, speed_result.shape[0]):
        #check = abs(Y_test[i] - h[i]) > abs(threshold[i])
        check = abs(Y_test[i, 0] - speed_result[i]) > abs(threshold)
        if check == True:
            check_count += 1
            #print("error predict: pred {0} truth {1} threshold {2}" .format(h[i],Y_test[i],abs(threshold[i])))
            #print("error predict: pred {0} truth {1} threshold {2}" .format(speed_result[i],Y_test[i,0],abs(threshold)))
    accuracy = (float(speed_result.shape[0] - check_count) /
                speed_result.shape[0]) * 100
    print("RBM Prediction Accuracy = %.2f %%" % accuracy)

    #get user input and predict the next speed
    min_speed = float(np.min(speed))
    max_speed = float(np.max(speed))
    min_numcar = float(np.min(num_car))
    max_numcar = float(np.max(num_car))
    min_hol = float(np.min(holiday))
    max_hol = float(np.max(holiday))

    def get_input():
        input_speed = input('Enter speed: ')
        input_numcar = input('Enter number of car: ')
        input_hol = input('Enter holday(0 = no, 1 = yes): ')
        if (input_speed != -1 and input_numcar != -1 and input_hol != -1):
            input_speed = (input_speed - min_speed) / (max_speed - min_speed)
            input_numcar = (input_numcar - min_numcar) / (max_numcar -
                                                          min_numcar)
            input_hol = (input_hol - min_hol) / (max_hol - min_hol)
            buffer = np.array([[input_speed, input_numcar, input_hol]])
            h = inference_model.predict(buffer)

            result = round(h[0, 0] * (max_speed - min_speed) + min_speed)
            print('Next Speed is ' + str(result))
            return 1
        else:
            return -1

    out = 1
    while (out != -1):
        print("Enter speed = -1 , number of car = -1 and holiday = -1 to exit")
        out = get_input()
示例#2
0
def main():
    # generate dummy dataset
    nframes = 10000
    dataset = np.random.normal(loc=np.zeros(input_dim), scale=np.ones(input_dim), size=(nframes, input_dim))

    # standardize (in this case superfluous)
    #dataset, mean, stddev = standardize(dataset)

    # split into train and test portion
    ntest   = 1000
    X_train = dataset[:-ntest :]     # all but last 1000 samples for training
    X_test  = dataset[-ntest:, :]    # last 1000 samples for testing
    X_trainsub = dataset[:ntest, :]  # subset of training data with same number of samples as testset
    assert X_train.shape[0] >= X_test.shape[0], 'Train set should be at least size of test set!'

    # setup model structure
    print('Creating training model...')
    rbm = GBRBM(input_dim=input_dim, hidden_dim=hidden_dim, init=glorot_uniform_sigm)
    rbm.srng = RandomStreams(seed=srng_seed)
    train_model = SingleLayerUnsupervised()
    train_model.add(rbm)

    # setup optimizer, loss
    momentum_schedule = make_stepped_schedule([(0, 0.5), (5, 0.9)])
    momentum_scheduler = MomentumScheduler(momentum_schedule)

    opt = SGD(lr, 0., decay=0.0, nesterov=False)

    contrastive_divergence = rbm.contrastive_divergence_loss(nb_gibbs_steps=1)

    # compile theano graph
    print('Compiling Theano graph...')
    train_model.compile(optimizer=opt, loss=contrastive_divergence)

    # additional monitors
    #rec_loss = rbm.reconstruction_loss(nb_gibbs_steps=1)
    #rec_err_logger = UnsupervisedLoss1Logger(X_train, loss=rec_loss, label='  - input reconstruction loss', every_n_epochs=1)
    #rec_err_logger.compile()

    #free_energy_gap = rbm.free_energy_gap
    #free_energy_gap_logger = UnsupervisedLoss2Logger(X_trainsub, X_test, loss=free_energy_gap, label='  - free energy gap', every_n_epochs=1)
    #free_energy_gap_logger.compile()

    # do training
    print('Training...')
    begin_time = time.time()

    #callbacks = [momentum_scheduler, rec_err_logger, free_energy_gap_logger]
    callbacks = [momentum_scheduler]
    train_model.fit(X_train, batch_size, nb_epoch, verbose=1, shuffle=False, callbacks=callbacks)

    end_time = time.time()

    print('Training took %f minutes' % ((end_time - begin_time)/60.0))

    # save model parameters
    print('Saving model...')
    rbm.save_weights('example.hdf5', overwrite=True)

    # load model parameters
    print('Loading model...')
    rbm.load_weights('example.hdf5')

    # generate hidden features from input data
    print('Creating inference model...')
    h_given_x = rbm.get_h_given_x_layer()
    inference_model = Sequential([h_given_x, SampleBernoulli(mode='maximum_likelihood')])

    print('Compiling Theano graph...')
    inference_model.compile(opt, loss='mean_squared_error') # XXX: optimizer and loss are not used!

    print('Doing inference...')
    h = inference_model.predict(dataset)

    print(h)

    print('Done!')
示例#3
0
def main():
    # generate dummy dataset
    nframes = 10000
    dataset = np.random.normal(loc=np.zeros(input_dim),
                               scale=np.ones(input_dim),
                               size=(nframes, input_dim))

    # split into train and test portion
    ntest = 1000
    X_train = dataset[:-ntest:]  # all but last 1000 samples for training
    X_test = dataset[-ntest:, :]  # last 1000 samples for testing
    assert X_train.shape[0] >= X_test.shape[
        0], 'Train set should be at least size of test set!'

    # setup model structure
    print('Creating training model...')
    rbm1 = GBRBM(hidden_dim[0],
                 input_dim=input_dim,
                 init=glorot_uniform_sigm,
                 activation='relu',
                 nb_gibbs_steps=nb_gibbs_steps,
                 persistent=True,
                 batch_size=batch_size,
                 dropout=dropouts[0])
    rbm2 = RBM(hidden_dim[1],
               input_dim=hidden_dim[0],
               init=glorot_uniform_sigm,
               activation='relu',
               nb_gibbs_steps=nb_gibbs_steps,
               persistent=True,
               batch_size=batch_size,
               dropout=dropouts[1])
    rbm3 = RBM(hidden_dim[2],
               input_dim=hidden_dim[1],
               init=glorot_uniform_sigm,
               activation='relu',
               nb_gibbs_steps=nb_gibbs_steps,
               persistent=True,
               batch_size=batch_size,
               dropout=dropouts[2])
    rbms = [rbm1, rbm2, rbm3]
    dbn = DBN(rbms, hidden_unit_type='binary')

    # setup optimizer, loss
    def get_layer_loss(rbm, layer_no):
        return rbm.contrastive_divergence_loss

    def get_layer_optimizer(layer_no):
        return SGD((layer_no + 1) * lr, 0., decay=0.0, nesterov=False)

    metrics = []
    for rbm in rbms:
        metrics.append([rbm.reconstruction_loss])
    dbn.compile(layer_optimizer=get_layer_optimizer,
                layer_loss=get_layer_loss,
                metrics=metrics)

    # do training
    print('Training...')

    dbn.fit(X_train, batch_size, nb_epoch, verbose=1, shuffle=False)

    # generate hidden features from input data
    print('Creating inference model...')
    F = dbn.get_forward_inference_layers()
    B = dbn.get_backward_inference_layers()
    inference_model = Sequential()
    for f in F:
        inference_model.add(f)
        inference_model.add(SampleBernoulli(mode='random'))
    for b in B[:-1]:
        inference_model.add(b)
        inference_model.add(SampleBernoulli(mode='random'))
    # last layer is a gaussian layer
    inference_model.add(B[-1])

    print('Compiling Theano graph...')
    opt = SGD()
    inference_model.compile(opt, loss='mean_squared_error')

    print('Doing inference...')
    h = inference_model.predict(dataset)

    print(h)

    print('Done!')
示例#4
0
def build_model():
    # generate dummy dataset
    def importdict(filename):#creates a function to read the csv
    #create data frame from csv with pandas module
        df=pd.read_csv(filename+'.csv', names=['systemtime', 'Var1', 'var2'],sep=';',parse_dates=[0]) #or:, infer_datetime_format=True)
        fileDATES=df.T.to_dict().values()#export the data frame to a python dictionary
        return fileDATES #return the dictionary to work with it outside the function
    fileDATES = importdict('clustering')
    timebuffer = []
    for i in range(1,len(fileDATES)):
        timebuffer.append((fileDATES[i]['systemtime'].split(","))[2]) #append only time into list #A
    #load_data = genfromtxt('.\\clustering.csv', delimiter=',')[1:5185,-3]
    
    CarsSpeed = genfromtxt('.\\clustering.csv', delimiter=',')[1:,-3]
    CarsTotal = genfromtxt('.\\clustering.csv', delimiter=',')[1:,4]
    hol = genfromtxt('.\\clustering.csv', delimiter=',')[1:,-7]
    #week_read = genfromtxt('.\\clustering.csv', delimiter=',')[1:,-8]
    
    
    
    #filter data in time range 6.00am to 9.00am
    
    #get speed since 6.00 am to 9.00 am
    speed = []
    for i in range(0,len(timebuffer)):
        if timebuffer[i] == '6:00':
            while timebuffer[i] != '9:05':
                speed.append(CarsSpeed[i])
                i+=1
    speed = np.array(speed)
    
    
    #get number of car since 6.00 am to 9.00 am
    num_car = []
    for i in range(0,len(timebuffer)):
        if timebuffer[i] == '6:00':
            while timebuffer[i] != '9:05':
                num_car.append(CarsTotal[i])
                i+=1
    num_car = np.array(num_car)
    
    #get holiday data since 6.00 am to 9.00 am
    holiday = []
    for i in range(0,len(timebuffer)):
        if timebuffer[i] == '6:00':
            while timebuffer[i] != '9:05':
                holiday.append(hol[i])
                i+=1
    holiday = np.array(holiday)
    
    '''
    #get holiday data since 6.00 am to 9.00 am
    week = []
    for i in range(0,len(timebuffer)):
        if timebuffer[i] == '6:00':
            while timebuffer[i] != '9:05':
                week.append(week_read[i])
                i+=1
    week = np.array(week)
    '''
    #combine speed and number of car into dataset 2d array
    #get dataset = [speed,num_car,holiday]
    dataset = np.array([[]])
    for i in range(0,len(speed)):
        buffer = np.array([])
        buffer = np.append(buffer,round(speed[i]))
        buffer = np.append(buffer,round(num_car[i]))
        buffer = np.append(buffer,round(holiday[i]))
        #buffer = np.append(buffer,round(week[i]))
        buffer2 = np.array([buffer])
        if i == 0:
            dataset = buffer2
        else:
            dataset = np.concatenate((dataset,buffer2))
    dataset = (np.asarray(dataset, 'float32'))
    
    '''
    rescale = np.array([[]])
    for i in range(0,dataset.shape[0]):
        buffer = np.array([])
        buffer = np.append(buffer,(dataset[i,0] - np.min(speed)) / (np.max(speed)-np.min(speed)))
        buffer = np.append(buffer,(dataset[i,1] - np.min(num_car)) / (np.max(num_car)-np.min(num_car)))
        buffer = np.append(buffer,(dataset[i,2] - np.min(holiday)) / (np.max(holiday)-np.min(holiday)))
        #buffer = np.append(buffer,(dataset[i,3] - np.min(week)) / (np.max(week)-np.min(week)))
        buffer2 = np.array([buffer])
        if i == 0:
            rescale = buffer2
        else:
            rescale = np.concatenate((rescale,buffer2))
    '''        
    rescale = np.array([[]])
    for i in range(0,dataset.shape[0]):
        buffer = np.array([])
        buffer = np.append(buffer,(dataset[i,0] - np.mean(speed)) / (np.std(speed)))
        buffer = np.append(buffer,(dataset[i,1] - np.mean(num_car)) / (np.std(num_car)))
        buffer = np.append(buffer,(dataset[i,2] - np.mean(holiday)) / (np.std(holiday)))
        #buffer = np.append(buffer,(dataset[i,3] - np.mean(week)) / (np.std(week)))
        buffer2 = np.array([buffer])
        if i == 0:
            rescale = buffer2
        else:
            rescale = np.concatenate((rescale,buffer2))
    
    train_ratio = 0.75
    divider = int(round(train_ratio*rescale.shape[0]))
    
    pred_minutes = 10
    
    #divide data into train and test
    X_train = rescale[:divider-int(pred_minutes/5)]
    X_test = rescale[divider:-int(pred_minutes/5)]
    Y_train = rescale[int(pred_minutes/5):divider]
    Y_test = dataset[divider+int(pred_minutes/5):]
     
    
    # setup model structure
    print('Creating training model...')
    rbm = GBRBM(hidden_dim, input_dim=input_dim,
    		init=glorot_uniform_sigm,
    		activation='sigmoid',
    		nb_gibbs_steps=nb_gibbs_steps,
    		persistent=True,
    		batch_size=batch_size,
    		dropout=0.0)
    
    rbm.srng = RandomStreams(seed=srng_seed)
    
    train_model = Sequential()
    
    train_model.add(rbm)
    #train_model.add(Dense(1, activation='sigmoid'))
    
    
    #train_model.summary()
    opt = SGD(lr, 0., decay=0.0, nesterov=False)
    loss=rbm.contrastive_divergence_loss
    metrics = [rbm.reconstruction_loss]
    
    # compile theano graph
    print('Compiling Theano graph...')
    train_model.compile(optimizer=opt, loss=loss, metrics=metrics)
     
    # do training
    print('Training...')    
    train_model.fit(X_train, Y_train, batch_size, nb_epoch, 
    		    verbose=1, shuffle=False)
    
    
    
    # generate hidden features from input data
    print('Creating inference model...')
    
    h_given_x = rbm.get_h_given_x_layer(as_initial_layer=True)
    
    inference_model = Sequential()
    #inference_model.add(Dense(6, input_dim = 2, activation='relu'))
    inference_model.add(h_given_x)
    #inference_model.add(Dense(8, activation='relu'))
    #inference_model.add(SampleBernoulli(mode='maximum_likelihood'))
    
    print('Compiling Theano graph...')
    inference_model.compile(opt, loss='mean_squared_error')
    
    print('Doing inference...')
    h = inference_model.predict(X_test)
    print(h)
    
    '''
    #convert result to real speed
    speed_result = []
    for i in range(0,len(h)):
        speed_result.append(round((h[i,0]*(np.max(speed)-np.min(speed)) + np.min(speed)))) #transfrom all predicted value into speed value
    speed_result = np.array(speed_result)
    print(speed_result)
    '''
    
    #convert result to real speed
    speed_result = []
    for i in range(0,len(h)):
        speed_result.append(round((h[i,0]*(np.std(speed)) + np.mean(speed)))) #transfrom all predicted value into speed value
    speed_result = np.array(speed_result)
    print(speed_result)
       
    
    '''
    
    range_sd = genfromtxt('.\\clustering.csv', delimiter=',')[1:,-5]
    check_count = 0
    ##############################################
    for i in range(0,len(timebuffer)-1):
        sd = []
        j=0
        if timebuffer[i] == '6:00': #This is 1 day data
            while timebuffer[i+j] != '9:05':
                print(i)
                sd.append(abs(range_sd[i+j]))
                j+=1
            sd = np.array(sd)
            threshold = np.mean(sd)
            threshold = math.ceil(threshold)
            if(threshold <= 5):
                threshold = 5
            #print(threshold)
        for j in range(0,37):
            check = abs(Y_test[i+j,0] - speed_result[i+j]) > abs(threshold)
            if check == True:
                    check_count+=1
    accuracy = (float(speed_result.shape[0]-check_count)/speed_result.shape[0])*100
    print("RBM Prediction Accuracy = %.2f %%" % accuracy)
    
    ##############################################
    '''
    
    
    #Evaluation part
    #set speed difference threshold
    #threshold = genfromtxt('.\\clustering.csv', delimiter=',')[1:,-5]
    threshold = 5
    mse_buffer = 0
    mae_buffer = 0
    check_count = 0
    for i in range(0,speed_result.shape[0]):
        check = abs(Y_test[i,0] - speed_result[i]) > abs(threshold)
        mse_buffer += (Y_test[i,0] - speed_result[i])*(Y_test[i,0] - speed_result[i])
        mae_buffer += abs(Y_test[i,0] - speed_result[i])
        if check == True:
            check_count+=1
    accuracy = (float(speed_result.shape[0]-check_count)/speed_result.shape[0])*100
    print("RBM Prediction Accuracy = %.2f %%" % accuracy)
    
    
    mse = math.sqrt(mse_buffer/speed_result.shape[0])
    mae = mae_buffer/speed_result.shape[0]
    print("MSE = %.2f %%" % mse)
    print("MAE = %.2f %%" % mae)
    
    print('Done!')
    
    
    
    less_than_5 = 0
    less = 0
    more = 0
    more_than_5 = 0
    equal = 0
    for i in range(0,speed_result.shape[0]):
        if (speed_result[i] - Y_test[i,0]) < -threshold and (speed_result[i] - Y_test[i,0]) < 0:
            less_than_5 = less_than_5 +1
        elif (speed_result[i] - Y_test[i,0]) >= -threshold and (speed_result[i] - Y_test[i,0]) < 0:
            less = less +1
        elif (speed_result[i] - Y_test[i,0]) == 0:
            equal = equal +1
        elif (speed_result[i] - Y_test[i,0]) <= threshold and (speed_result[i] - Y_test[i,0]) > 0:
            more = more +1
        elif (speed_result[i] - Y_test[i,0]) > threshold and (speed_result[i] - Y_test[i,0]) > 0:
            more_than_5 = more_than_5 +1
    
    less_than_5 = (less_than_5/speed_result.shape[0])*100
    less = (less/speed_result.shape[0])*100
    equal = (equal/speed_result.shape[0])*100
    more = (more/speed_result.shape[0])*100
    more_than_5 = (more_than_5/speed_result.shape[0])*100
    
    print("outbound lower = %.2f %% " % less_than_5)
    print("lower = %.2f %%" % less)
    print("equal = %.2f %%" % equal)
    print("higher = %.2f %%" % more)
    print("outbound higher = %.2f %%" % more_than_5)
    
    
    print('Done!')
    
    
    
    
    with open('speed.csv', 'wb') as f:
         writer = csv.writer(f, delimiter = ',')
         for row in Y_test:
             writer.writerow([row[0]])    
    with open('result.csv', 'wb') as f:
         writer = csv.writer(f, delimiter = ',')
         for row in speed_result:
             writer.writerow([row])        
    
    
    
    
    
    
    
    '''
示例#5
0
def main():
    # generate dummy dataset
    def importdict(filename):  #creates a function to read the csv
        #create data frame from csv with pandas module
        df = pd.read_csv(filename + '.csv',
                         names=['systemtime', 'Var1', 'var2'],
                         sep=';',
                         parse_dates=[0])  #or:, infer_datetime_format=True)
        fileDATES = df.T.to_dict().values(
        )  #export the data frame to a python dictionary
        return fileDATES  #return the dictionary to work with it outside the function

    fileDATES = importdict('clustering')
    timebuffer = []
    for i in range(1, len(fileDATES)):
        timebuffer.append((fileDATES[i]['systemtime'].split(",")
                           )[2])  #append only time into list #A
    load_data = genfromtxt(
        'C:\Users\oob13\Desktop\Internship\TrafficFlowPrediction\keras_extensions-master\examples\clustering.csv',
        delimiter=',')[1:5185, -3]
    #filter data in time range 6.00am to 9.00am
    speed = []
    for i in range(0, len(timebuffer)):
        if timebuffer[i] == '6:00':
            while timebuffer[i] != '9:05':
                speed.append(load_data[i])
                i += 1
    speed = np.array(speed)

    #generate 2d array
    dataset = np.array([[]])
    for i in range(0, len(speed), 1):
        buffer = np.array([])
        for j in range(0, 1):
            buffer = np.append(buffer, round(speed[i + j]))
        buffer2 = np.array([buffer])
        if i == 0:
            dataset = buffer2
        else:
            dataset = np.concatenate((dataset, buffer2))
    dataset = (np.asarray(dataset, 'float32'))[:-1]
    #transform datset to 0-1 value
    rescale = (dataset - np.min(speed)) / (np.max(speed) - np.min(speed))

    #dataset = np.random.normal(loc=np.zeros(input_dim), scale=np.ones(input_dim), size=(nframes, input_dim))

    # split into train and test portion
    #ntest   = int(0.1*len(speed))
    #X_train = rescale[:-ntest :]     # all but last 1000 samples for training

    #X_test  = rescale[-ntest:, :]    # last 1000 samples for testing
    #Y_train = rescale[:-ntest :]
    '''
    X_train = dataset[:-1]
    X_test = X_train
    Y_train = dataset[1:]
    Y_test = Y_train
    '''
    X_train = rescale[:-1]
    X_test = X_train
    Y_train = rescale[1:]
    Y_test = dataset[1:]
    '''
    X_train = rescale[:-1]
    X_test = X_train
    Y_train = dataset[1:]
    Y_test = Y_train
    '''
    '''
    # split into train and test portion
    ntest   = int(0.1*len(speed))
    X_train = rescale[:-ntest]
    X_train = X_train[:-1]
    Y_train = dataset[:-ntest]
    Y_train = Y_train[1:]
    
    X_test = rescale[-ntest:]
    X_test = X_test[:-1]
    Y_test = dataset[-ntest:]
    Y_test = Y_test[1:]
    '''

    #assert X_train.shape[0] >= X_test.shape[0], 'Train set should be at least size of test set!'
    # setup model structure
    print('Creating training model...')
    rbm = GBRBM(hidden_dim,
                input_dim=input_dim,
                init=glorot_uniform_sigm,
                activation='relu',
                nb_gibbs_steps=nb_gibbs_steps,
                persistent=True,
                batch_size=batch_size,
                dropout=0.0)

    rbm.srng = RandomStreams(seed=srng_seed)

    train_model = Sequential()
    train_model.add(rbm)
    train_model.summary()
    opt = SGD(lr, 0., decay=0.0, nesterov=False)
    loss = rbm.contrastive_divergence_loss
    metrics = [rbm.reconstruction_loss]

    # compile theano graph
    print('Compiling Theano graph...')
    train_model.compile(optimizer=opt, loss=loss, metrics=metrics)

    # do training
    print('Training...')
    #train_model.fit(X_train, Y_train, batch_size, nb_epoch, dverbose=1, shuffle=False)

    # generate hidden features from input data
    print('Creating inference model...')

    h_given_x = rbm.get_h_given_x_layer(as_initial_layer=True)

    inference_model = Sequential()
    inference_model.add(h_given_x)
    #inference_model.add(SampleBernoulli(mode='maximum_likelihood'))

    print('Compiling Theano graph...')
    inference_model.compile(opt, loss='mean_squared_error')
    inference_model.fit(X_train, Y_train, batch_size, nb_epoch, shuffle=False)

    print('Doing inference...')
    h = inference_model.predict(X_test)
    showtomygod = h
    with open("show.csv", "wb") as f:
        writer = csv.writer(f)
        writer.writerows(showtomygod[0:100, :])
    '''
    for i in range(0,len(dataset)):
        if dataset[i] == round(np.average(dataset)):
            base = dataset[i]
    itemindex = np.where(dataset==base)[0][0]
    
    base_transform = h[itemindex-1]
    float_base_transform = float(base_transform)
    
    diff_ratio = (h[1]-h[0])/(dataset[2]-dataset[1])
    for i in range(0,len(h)) :
        h[i] = round(((h[i]-float_base_transform)/diff_ratio) + np.average(dataset))
    '''

    print(dataset)

    print(h)

    #save to csv
    print('Done!')
    with open("dataset.csv", "wb") as f:
        writer = csv.writer(f)
        writer.writerows(dataset[0:100, :])
    with open("houtput.csv", "wb") as f:
        writer = csv.writer(f)
        writer.writerows(h[0:100, :])
    with open("rescale.csv", "wb") as f:
        writer = csv.writer(f)
        writer.writerows(rescale[0:100, :])

    #Evaluation part
    #set speed difference threshold
    #threshold = genfromtxt('C:\Users\oob13\Desktop\Internship\TrafficFlowPrediction\keras_extensions-master\examples\clustering.csv', delimiter=',')[1:,-5]
    threshold = 5
    check_count = 0
    for i in range(0, len(h)):
        #check = abs(Y_test[i] - h[i]) > abs(threshold[i])
        check = abs(Y_test[i] - h[i]) > abs(threshold)
        if check == True:
            check_count += 1
            #print("error predict: pred {0} truth {1} threshold {2}" .format(h[i],Y_test[i],abs(threshold[i])))
            print("error predict: pred {0} truth {1} threshold {2}".format(
                h[i], Y_test[i], abs(threshold)))
    accuracy = (float(h.shape[0] - check_count) / h.shape[0]) * 100
    print("RBM Accuracy = %.2f %%" % accuracy)
def main():
    #grab input data set and set up dataset here
    X_train = []
    X_test = []
    print('Creating training model')
    #start with a GBRBM and then followed by 5 more RBMs for 5*2 = 10 hidden layers
    dbn = DBN([
        GBRBM(input_dim, internal_dim, init=glorot_uniform_sigm),
        RBM(internal_dim, internal_dim, init=glorot_uniform_sigm),
        RBM(internal_dim, internal_dim, init=glorot_uniform_sigm),
        RBM(internal_dim, internal_dim, init=glorot_uniform_sigm),
        RBM(internal_dim, internal_dim, init=glorot_uniform_sigm),
        RBM(internal_dim, internal_dim, init=glorot_uniform_sigm)
    ])

    def get_layer_loss(rbm, layer_no):
        return rbm.contrastive_divergence_loss(nb_gibbs_steps=1)

    def get_layer_optimizer(layer_no):
        return SGD((layer_no + 1) * lr, 0., decay=0.0, nesterov=False)

    dbn.compile(layer_optimizer=get_layer_optimizer, layer_loss=get_layer_loss)

    #Train
    #train off token vectors from early version of software
    print('Training')
    begin_time = time.time()

    dbn.fit(X_train, batch_size, nb_epoch, verbose=1, shuffle=False)

    end_time = time.time()
    print('Training took %f minutes' % ((end_time - begin_time) / 60.0))

    #save model parameters from training
    print('Saving model')
    dbn.save_weights('dbn_weights.hdf5', overwrite=True)

    #load model  from save
    print('Loading model')
    dbn.load_weights('dbn_weights.hdf5')

    #generate hidden features from input data
    print('Creating inference model')
    F = dbn.get_forward_inference_layers()
    B = dbn.get_backwards_inference_layers()
    inference_model = Sequential()
    for f in F:
        inference_model.add(f)
        inference_model.add(SampleBernoulli(mode='random'))
    for b in B[:-1]:
        inference_model.add(b)
        inference_model.add(SampleBernoulli(mode='random'))
    #last layer is a gaussian layer
    inference_model.add(B[-1])

    print('Compiling Theano graph')
    opt = SGD()
    inference_model.compile(opt, loss='mean_squared_error')

    print('Doing inference')
    h = inference_model.predict(X_test)
示例#7
0
def build_model():
    # generate dummy dataset
    def importdict(filename):  #creates a function to read the csv
        #create data frame from csv with pandas module
        df = pd.read_csv(filename + '.csv',
                         names=['systemtime', 'Var1', 'var2'],
                         sep=';',
                         parse_dates=[0])  #or:, infer_datetime_format=True)
        fileDATES = df.T.to_dict().values(
        )  #export the data frame to a python dictionary
        return fileDATES  #return the dictionary to work with it outside the function

    fileDATES = importdict('.\\clustering2')
    #get time and keep it in list
    #use time to filter data later
    timebuffer = []
    for i in range(1, len(fileDATES)):
        timebuffer.append((fileDATES[i]['systemtime'].split(",")
                           )[2])  #append only time into list #A

    timebuffer = timebuffer[0:7499]
    #load any features
    CarsSpeed = genfromtxt('.\\clustering2.csv', delimiter=',')[1:7500, -3]
    #CarsTotal = genfromtxt('C:\Users\oob13\Desktop\Internship\TrafficFlowPrediction\clustering2.csv', delimiter=',')[1:,4]
    #hol = genfromtxt('C:\Users\oob13\Desktop\Internship\TrafficFlowPrediction\clustering2.csv', delimiter=',')[1:,-7]

    #use all speed
    speed = np.array(CarsSpeed)
    '''
    #filter data in time range 6.00am to 9.00am
    #open comment depends on feature used
    #get speed since 6.00 am to 9.00 am
    speed = []
    for i in range(0,len(timebuffer)):
        #print(timebuffer[i])
        if timebuffer[i] == '6:00':
            while timebuffer[i] != '9:05':
                speed.append(CarsSpeed[i])
                i+=1
    speed = np.array(speed)
    
    
    #get number of car since 6.00 am to 9.00 am
    num_car = []
    for i in range(0,len(timebuffer)):
        if timebuffer[i] == '6:00':
            while timebuffer[i] != '9:05':
                num_car.append(CarsTotal[i])
                i+=1
    num_car = np.array(num_car)
    
    #get holiday data since 6.00 am to 9.00 am
    holiday = []
    for i in range(0,len(timebuffer)):
        if timebuffer[i] == '6:00':
            while timebuffer[i] != '9:05':
                holiday.append(hol[i])
                i+=1
    holiday = np.array(holiday)

    '''
    #combine speed and number of car into dataset 2d array
    #get dataset = [100] -> [[100]]
    dataset = np.array([[]])
    for i in range(0, len(speed)):
        buffer = np.array([])
        buffer = np.append(buffer, (speed[i]))
        #buffer = np.append(buffer,round(num_car[i]))
        #buffer = np.append(buffer,round(holiday[i]))
        buffer2 = np.array([buffer])
        if i == 0:
            dataset = buffer2
        else:
            dataset = np.concatenate((dataset, buffer2))
    dataset = (np.asarray(dataset, 'float32'))

    #rescale from real speed to 0-1 value and keep in "buffer" variable
    #one day has 288 data
    buffer = np.array([])
    for i in range(0, len(speed) - 287, 287):
        buffer_speed = []
        #start in one day
        for j in range(i, i + 288):
            buffer_speed.append(speed[j])
        buffer_speed = np.array(buffer_speed)
        #get mean and std in one day
        mean_day = np.mean(buffer_speed)
        std_day = np.std(buffer_speed)
        #resacle by equation: z = (x - mean(x))/std(x)
        for k in range(i, i + 288):
            regular = (speed[k] - mean_day) / (std_day)
            buffer = np.append(buffer, [regular])
    rescale = buffer
    '''
    #change data from 1d array to 2d array
    #get [0.03] -> [[0.03]]
    rescale = np.array([[]])
    for i in range(0,len(speed)):
        buffer2 = np.array([])
        buffer2 = np.append(buffer2,(buffer[i]))
        buffer2 = np.array([buffer2])
        if i == 0:
            rescale = buffer2
        else:
            rescale = np.concatenate((rescale,buffer2))
    '''
    #divide training data and testing data
    train_ratio = 0.75
    divider = int(round(train_ratio * rescale.shape[0]))

    #set future minutes for predicting
    pred_minutes = 5

    #divide data into train and test
    X_train = rescale[:divider - int(pred_minutes / 5)]
    X_test = rescale[divider:-int(pred_minutes / 5)]
    Y_train = dataset[int(pred_minutes / 5):divider]
    Y_test = dataset[divider + int(pred_minutes / 5):]

    # setup model structure
    print('Creating training model...')
    rbm = GBRBM(hidden_dim,
                input_dim=input_dim,
                init=glorot_uniform_sigm,
                activation='sigmoid',
                nb_gibbs_steps=nb_gibbs_steps,
                persistent=True,
                batch_size=batch_size,
                dropout=0.0)

    rbm.srng = RandomStreams(seed=srng_seed)

    train_model = Sequential()

    train_model.add(rbm)

    #set optimizer as Stochastic gradient descent
    #set loss fnction as contrastive divergence loss
    opt = SGD(lr, 0., decay=0.0, nesterov=False)
    loss = rbm.contrastive_divergence_loss
    metrics = [rbm.reconstruction_loss]

    # compile theano graph
    print('Compiling Theano graph...')
    train_model.compile(optimizer=opt, loss=loss, metrics=metrics)

    # do training
    print('Training...')
    train_model.fit(X_train,
                    Y_train,
                    batch_size,
                    nb_epoch,
                    verbose=1,
                    shuffle=False)

    # generate hidden features from input data
    print('Creating inference model...')
    h_given_x = rbm.get_h_given_x_layer(as_initial_layer=True)

    #add output layer to model
    inference_model = Sequential()
    inference_model.add(h_given_x)

    print('Compiling Theano graph...')
    inference_model.compile(opt, loss='mean_squared_error')

    #predicting result
    #get 0-1 values
    print('Doing inference...')
    h = inference_model.predict(X_test)
    #print(h)

    #convert result to real speed
    #use invert of the same equation
    #speed_result var is the predicted speed after values are transformed
    speed_result = []
    for i in range(0, len(h)):
        speed_result.append(round(
            (h[i, 0] * (np.std(speed)) +
             np.mean(speed))))  #transfrom all predicted value into speed value
    speed_result = np.array(speed_result)
    #print(speed_result)

    #Evaluation part
    #find accuracy of model by using threshold
    #set speed difference threshold
    threshold = 5
    check_count = 0
    for i in range(0, speed_result.shape[0]):
        check = abs(Y_test[i, 0] - speed_result[i]) > abs(threshold)
        if check == True:
            check_count += 1
    accuracy = (float(speed_result.shape[0] - check_count) /
                speed_result.shape[0]) * 100
    print("RBM Prediction Accuracy = %.2f %%" % accuracy)
    print('Done!')

    #find root mean square error
    def rmse(predictions, targets):
        return (np.sqrt(((predictions - targets)**2).mean()))

    print("MSE")
    print(rmse(speed_result, Y_test[:, 0]))

    #find mean absolute error
    def mae(predictions, targets):
        return ((np.absolute(predictions - targets)).mean())

    print("MAE")
    print(mae(speed_result, Y_test[:, 0]))

    #save ground-truth value and predicted value to csv
    with open('speed.csv', 'wb') as f:
        writer = csv.writer(f, delimiter=',')
        for row in Y_test:
            writer.writerow([row[0]])
    with open('result.csv', 'wb') as f:
        writer = csv.writer(f, delimiter=',')
        for row in speed_result:
            writer.writerow([row])
示例#8
0
def main():
    # generate dummy dataset
    nframes = 10000
    dataset = np.random.normal(loc=np.zeros(input_dim),
                               scale=np.ones(input_dim),
                               size=(nframes, input_dim))

    # split into train and test portion
    ntest = 1000
    X_train = dataset[:-ntest:]  # all but last 1000 samples for training
    X_test = dataset[-ntest:, :]  # last 1000 samples for testing

    assert X_train.shape[0] >= X_test.shape[
        0], 'Train set should be at least size of test set!'

    # setup model structure
    print('Creating training model...')
    rbm = GBRBM(hidden_dim,
                input_dim=input_dim,
                init=glorot_uniform_sigm,
                activation='relu',
                nb_gibbs_steps=nb_gibbs_steps,
                persistent=True,
                batch_size=batch_size,
                dropout=0.0)

    rbm.srng = RandomStreams(seed=srng_seed)

    train_model = Sequential()
    train_model.add(rbm)

    opt = SGD(lr, 0., decay=0.0, nesterov=False)
    loss = rbm.contrastive_divergence_loss
    metrics = [rbm.reconstruction_loss]

    # compile theano graph
    print('Compiling Theano graph...')
    train_model.compile(optimizer=opt, loss=loss, metrics=metrics)

    # do training
    print('Training...')
    train_model.fit(X_train,
                    X_train,
                    batch_size,
                    nb_epoch,
                    verbose=1,
                    shuffle=False)

    # generate hidden features from input data
    print('Creating inference model...')

    h_given_x = rbm.get_h_given_x_layer(as_initial_layer=True)

    inference_model = Sequential()
    inference_model.add(h_given_x)
    #inference_model.add(SampleBernoulli(mode='maximum_likelihood'))

    print('Compiling Theano graph...')
    inference_model.compile(opt, loss='mean_squared_error')

    print('Doing inference...')
    h = inference_model.predict(dataset)

    print(h)

    print('Done!')
示例#9
0
def main():
    # generate dummy dataset
    nframes = 10000
    dataset = np.random.normal(loc=np.zeros(input_dim),
                               scale=np.ones(input_dim),
                               size=(nframes, input_dim))

    # standardize (in this case superfluous)
    #dataset, mean, stddev = standardize(dataset)

    # split into train and test portion
    ntest = 1000
    X_train = dataset[:-ntest:]  # all but last 1000 samples for training
    X_test = dataset[-ntest:, :]  # last 1000 samples for testing
    X_trainsub = dataset[:
                         ntest, :]  # subset of training data with same number of samples as testset
    assert X_train.shape[0] >= X_test.shape[
        0], 'Train set should be at least size of test set!'

    # setup model structure
    print('Creating training model...')
    dbn = DBN([
        GBRBM(input_dim, 200, init=glorot_uniform_sigm),
        RBM(200, 400, init=glorot_uniform_sigm),
        RBM(400, 300, init=glorot_uniform_sigm),
        RBM(300, 50, init=glorot_uniform_sigm),
        RBM(50, hidden_dim, init=glorot_uniform_sigm)
    ])

    # setup optimizer, loss
    def get_layer_loss(rbm, layer_no):
        return rbm.contrastive_divergence_loss(nb_gibbs_steps=1)

    def get_layer_optimizer(layer_no):
        return SGD((layer_no + 1) * lr, 0., decay=0.0, nesterov=False)

    dbn.compile(layer_optimizer=get_layer_optimizer, layer_loss=get_layer_loss)

    # do training
    print('Training...')
    begin_time = time.time()

    #callbacks = [momentum_scheduler, rec_err_logger, free_energy_gap_logger]
    dbn.fit(X_train, batch_size, nb_epoch, verbose=1, shuffle=False)

    end_time = time.time()

    print('Training took %f minutes' % ((end_time - begin_time) / 60.0))

    # save model parameters
    print('Saving model...')
    dbn.save_weights('example.hdf5', overwrite=True)

    # load model parameters
    print('Loading model...')
    dbn.load_weights('example.hdf5')

    # generate hidden features from input data
    print('Creating inference model...')
    F = dbn.get_forward_inference_layers()
    B = dbn.get_backward_inference_layers()
    inference_model = Sequential()
    for f in F:
        inference_model.add(f)
        inference_model.add(SampleBernoulli(mode='random'))
    for b in B[:-1]:
        inference_model.add(b)
        inference_model.add(SampleBernoulli(mode='random'))
    # last layer is a gaussian layer
    inference_model.add(B[-1])

    print('Compiling Theano graph...')
    opt = SGD()
    inference_model.compile(
        opt,
        loss='mean_squared_error')  # XXX: optimizer and loss are not used!

    print('Doing inference...')
    h = inference_model.predict(dataset)

    print(h)

    print('Done!')