def build_model(): # generate dummy dataset def importdict(filename): #creates a function to read the csv #create data frame from csv with pandas module df = pd.read_csv(filename + '.csv', names=['systemtime', 'Var1', 'var2'], sep=';', parse_dates=[0]) #or:, infer_datetime_format=True) fileDATES = df.T.to_dict().values( ) #export the data frame to a python dictionary return fileDATES #return the dictionary to work with it outside the function fileDATES = importdict('clustering') timebuffer = [] for i in range(1, len(fileDATES)): timebuffer.append((fileDATES[i]['systemtime'].split(",") )[2]) #append only time into list #A #load_data = genfromtxt('.\\clustering.csv', delimiter=',')[1:5185,-3] CarsSpeed = genfromtxt('.\\clustering.csv', delimiter=',')[1:, -3] CarsTotal = genfromtxt('.\\clustering.csv', delimiter=',')[1:, 4] hol = genfromtxt('.\\clustering.csv', delimiter=',')[1:, -7] #filter data in time range 6.00am to 9.00am #get speed since 6.00 am to 9.00 am speed = [] for i in range(0, len(timebuffer)): if timebuffer[i] == '6:00': while timebuffer[i] != '9:05': speed.append(CarsSpeed[i]) i += 1 speed = np.array(speed) #get number of car since 6.00 am to 9.00 am num_car = [] for i in range(0, len(timebuffer)): if timebuffer[i] == '6:00': while timebuffer[i] != '9:05': num_car.append(CarsTotal[i]) i += 1 num_car = np.array(num_car) #get holiday data since 6.00 am to 9.00 am holiday = [] for i in range(0, len(timebuffer)): if timebuffer[i] == '6:00': while timebuffer[i] != '9:05': holiday.append(hol[i]) i += 1 holiday = np.array(holiday) #combine speed and number of car into dataset 2d array #get dataset = [speed,num_car,holiday] dataset = np.array([[]]) for i in range(0, len(speed)): buffer = np.array([]) buffer = np.append(buffer, round(speed[i])) buffer = np.append(buffer, round(num_car[i])) buffer = np.append(buffer, round(holiday[i])) buffer2 = np.array([buffer]) if i == 0: dataset = buffer2 else: dataset = np.concatenate((dataset, buffer2)) dataset = (np.asarray(dataset, 'float32')) rescale = np.array([[]]) for i in range(0, dataset.shape[0]): buffer = np.array([]) buffer = np.append(buffer, (dataset[i, 0] - np.min(speed)) / (np.max(speed) - np.min(speed))) buffer = np.append(buffer, (dataset[i, 1] - np.min(num_car)) / (np.max(num_car) - np.min(num_car))) buffer = np.append(buffer, (dataset[i, 2] - np.min(holiday)) / (np.max(holiday) - np.min(holiday))) buffer2 = np.array([buffer]) if i == 0: rescale = buffer2 else: rescale = np.concatenate((rescale, buffer2)) X_train = rescale[:-1] X_test = X_train Y_train = rescale[1:] Y_test = dataset[1:] # setup model structure print('Creating training model...') rbm = GBRBM(hidden_dim, input_dim=input_dim, init=glorot_uniform_sigm, activation='sigmoid', nb_gibbs_steps=nb_gibbs_steps, persistent=True, batch_size=batch_size, dropout=0.0) rbm.srng = RandomStreams(seed=srng_seed) train_model = Sequential() train_model.add(rbm) #train_model.add(Dense(1, activation='sigmoid')) #train_model.summary() opt = SGD(lr, 0., decay=0.0, nesterov=False) loss = rbm.contrastive_divergence_loss metrics = [rbm.reconstruction_loss] # compile theano graph print('Compiling Theano graph...') train_model.compile(optimizer=opt, loss=loss, metrics=metrics) # do training print('Training...') train_model.fit(X_train, Y_train, batch_size, nb_epoch, verbose=1, shuffle=False) # generate hidden features from input data print('Creating inference model...') h_given_x = rbm.get_h_given_x_layer(as_initial_layer=True) inference_model = Sequential() #inference_model.add(Dense(6, input_dim = 2, activation='relu')) inference_model.add(h_given_x) #inference_model.add(Dense(8, activation='relu')) #inference_model.add(SampleBernoulli(mode='maximum_likelihood')) print('Compiling Theano graph...') inference_model.compile(opt, loss='mean_squared_error') print('Doing inference...') h = inference_model.predict(X_test) print(h) #convert result to real speed speed_result = [] for i in range(0, len(h)): speed_result.append( round((h[i, 0] * (np.max(speed) - np.min(speed)) + np.min(speed) ))) #transfrom all predicted value into speed value speed_result = np.array(speed_result) print(speed_result) #Evaluation part #set speed difference threshold #threshold = genfromtxt('.\\clustering.csv', delimiter=',')[1:,-5] threshold = 5 check_count = 0 for i in range(0, speed_result.shape[0]): #check = abs(Y_test[i] - h[i]) > abs(threshold[i]) check = abs(Y_test[i, 0] - speed_result[i]) > abs(threshold) if check == True: check_count += 1 #print("error predict: pred {0} truth {1} threshold {2}" .format(h[i],Y_test[i],abs(threshold[i]))) #print("error predict: pred {0} truth {1} threshold {2}" .format(speed_result[i],Y_test[i,0],abs(threshold))) accuracy = (float(speed_result.shape[0] - check_count) / speed_result.shape[0]) * 100 print("RBM Prediction Accuracy = %.2f %%" % accuracy) #get user input and predict the next speed min_speed = float(np.min(speed)) max_speed = float(np.max(speed)) min_numcar = float(np.min(num_car)) max_numcar = float(np.max(num_car)) min_hol = float(np.min(holiday)) max_hol = float(np.max(holiday)) def get_input(): input_speed = input('Enter speed: ') input_numcar = input('Enter number of car: ') input_hol = input('Enter holday(0 = no, 1 = yes): ') if (input_speed != -1 and input_numcar != -1 and input_hol != -1): input_speed = (input_speed - min_speed) / (max_speed - min_speed) input_numcar = (input_numcar - min_numcar) / (max_numcar - min_numcar) input_hol = (input_hol - min_hol) / (max_hol - min_hol) buffer = np.array([[input_speed, input_numcar, input_hol]]) h = inference_model.predict(buffer) result = round(h[0, 0] * (max_speed - min_speed) + min_speed) print('Next Speed is ' + str(result)) return 1 else: return -1 out = 1 while (out != -1): print("Enter speed = -1 , number of car = -1 and holiday = -1 to exit") out = get_input()
def main(): # generate dummy dataset nframes = 10000 dataset = np.random.normal(loc=np.zeros(input_dim), scale=np.ones(input_dim), size=(nframes, input_dim)) # standardize (in this case superfluous) #dataset, mean, stddev = standardize(dataset) # split into train and test portion ntest = 1000 X_train = dataset[:-ntest :] # all but last 1000 samples for training X_test = dataset[-ntest:, :] # last 1000 samples for testing X_trainsub = dataset[:ntest, :] # subset of training data with same number of samples as testset assert X_train.shape[0] >= X_test.shape[0], 'Train set should be at least size of test set!' # setup model structure print('Creating training model...') rbm = GBRBM(input_dim=input_dim, hidden_dim=hidden_dim, init=glorot_uniform_sigm) rbm.srng = RandomStreams(seed=srng_seed) train_model = SingleLayerUnsupervised() train_model.add(rbm) # setup optimizer, loss momentum_schedule = make_stepped_schedule([(0, 0.5), (5, 0.9)]) momentum_scheduler = MomentumScheduler(momentum_schedule) opt = SGD(lr, 0., decay=0.0, nesterov=False) contrastive_divergence = rbm.contrastive_divergence_loss(nb_gibbs_steps=1) # compile theano graph print('Compiling Theano graph...') train_model.compile(optimizer=opt, loss=contrastive_divergence) # additional monitors #rec_loss = rbm.reconstruction_loss(nb_gibbs_steps=1) #rec_err_logger = UnsupervisedLoss1Logger(X_train, loss=rec_loss, label=' - input reconstruction loss', every_n_epochs=1) #rec_err_logger.compile() #free_energy_gap = rbm.free_energy_gap #free_energy_gap_logger = UnsupervisedLoss2Logger(X_trainsub, X_test, loss=free_energy_gap, label=' - free energy gap', every_n_epochs=1) #free_energy_gap_logger.compile() # do training print('Training...') begin_time = time.time() #callbacks = [momentum_scheduler, rec_err_logger, free_energy_gap_logger] callbacks = [momentum_scheduler] train_model.fit(X_train, batch_size, nb_epoch, verbose=1, shuffle=False, callbacks=callbacks) end_time = time.time() print('Training took %f minutes' % ((end_time - begin_time)/60.0)) # save model parameters print('Saving model...') rbm.save_weights('example.hdf5', overwrite=True) # load model parameters print('Loading model...') rbm.load_weights('example.hdf5') # generate hidden features from input data print('Creating inference model...') h_given_x = rbm.get_h_given_x_layer() inference_model = Sequential([h_given_x, SampleBernoulli(mode='maximum_likelihood')]) print('Compiling Theano graph...') inference_model.compile(opt, loss='mean_squared_error') # XXX: optimizer and loss are not used! print('Doing inference...') h = inference_model.predict(dataset) print(h) print('Done!')
def main(): # generate dummy dataset nframes = 10000 dataset = np.random.normal(loc=np.zeros(input_dim), scale=np.ones(input_dim), size=(nframes, input_dim)) # split into train and test portion ntest = 1000 X_train = dataset[:-ntest:] # all but last 1000 samples for training X_test = dataset[-ntest:, :] # last 1000 samples for testing assert X_train.shape[0] >= X_test.shape[ 0], 'Train set should be at least size of test set!' # setup model structure print('Creating training model...') rbm1 = GBRBM(hidden_dim[0], input_dim=input_dim, init=glorot_uniform_sigm, activation='relu', nb_gibbs_steps=nb_gibbs_steps, persistent=True, batch_size=batch_size, dropout=dropouts[0]) rbm2 = RBM(hidden_dim[1], input_dim=hidden_dim[0], init=glorot_uniform_sigm, activation='relu', nb_gibbs_steps=nb_gibbs_steps, persistent=True, batch_size=batch_size, dropout=dropouts[1]) rbm3 = RBM(hidden_dim[2], input_dim=hidden_dim[1], init=glorot_uniform_sigm, activation='relu', nb_gibbs_steps=nb_gibbs_steps, persistent=True, batch_size=batch_size, dropout=dropouts[2]) rbms = [rbm1, rbm2, rbm3] dbn = DBN(rbms, hidden_unit_type='binary') # setup optimizer, loss def get_layer_loss(rbm, layer_no): return rbm.contrastive_divergence_loss def get_layer_optimizer(layer_no): return SGD((layer_no + 1) * lr, 0., decay=0.0, nesterov=False) metrics = [] for rbm in rbms: metrics.append([rbm.reconstruction_loss]) dbn.compile(layer_optimizer=get_layer_optimizer, layer_loss=get_layer_loss, metrics=metrics) # do training print('Training...') dbn.fit(X_train, batch_size, nb_epoch, verbose=1, shuffle=False) # generate hidden features from input data print('Creating inference model...') F = dbn.get_forward_inference_layers() B = dbn.get_backward_inference_layers() inference_model = Sequential() for f in F: inference_model.add(f) inference_model.add(SampleBernoulli(mode='random')) for b in B[:-1]: inference_model.add(b) inference_model.add(SampleBernoulli(mode='random')) # last layer is a gaussian layer inference_model.add(B[-1]) print('Compiling Theano graph...') opt = SGD() inference_model.compile(opt, loss='mean_squared_error') print('Doing inference...') h = inference_model.predict(dataset) print(h) print('Done!')
def build_model(): # generate dummy dataset def importdict(filename):#creates a function to read the csv #create data frame from csv with pandas module df=pd.read_csv(filename+'.csv', names=['systemtime', 'Var1', 'var2'],sep=';',parse_dates=[0]) #or:, infer_datetime_format=True) fileDATES=df.T.to_dict().values()#export the data frame to a python dictionary return fileDATES #return the dictionary to work with it outside the function fileDATES = importdict('clustering') timebuffer = [] for i in range(1,len(fileDATES)): timebuffer.append((fileDATES[i]['systemtime'].split(","))[2]) #append only time into list #A #load_data = genfromtxt('.\\clustering.csv', delimiter=',')[1:5185,-3] CarsSpeed = genfromtxt('.\\clustering.csv', delimiter=',')[1:,-3] CarsTotal = genfromtxt('.\\clustering.csv', delimiter=',')[1:,4] hol = genfromtxt('.\\clustering.csv', delimiter=',')[1:,-7] #week_read = genfromtxt('.\\clustering.csv', delimiter=',')[1:,-8] #filter data in time range 6.00am to 9.00am #get speed since 6.00 am to 9.00 am speed = [] for i in range(0,len(timebuffer)): if timebuffer[i] == '6:00': while timebuffer[i] != '9:05': speed.append(CarsSpeed[i]) i+=1 speed = np.array(speed) #get number of car since 6.00 am to 9.00 am num_car = [] for i in range(0,len(timebuffer)): if timebuffer[i] == '6:00': while timebuffer[i] != '9:05': num_car.append(CarsTotal[i]) i+=1 num_car = np.array(num_car) #get holiday data since 6.00 am to 9.00 am holiday = [] for i in range(0,len(timebuffer)): if timebuffer[i] == '6:00': while timebuffer[i] != '9:05': holiday.append(hol[i]) i+=1 holiday = np.array(holiday) ''' #get holiday data since 6.00 am to 9.00 am week = [] for i in range(0,len(timebuffer)): if timebuffer[i] == '6:00': while timebuffer[i] != '9:05': week.append(week_read[i]) i+=1 week = np.array(week) ''' #combine speed and number of car into dataset 2d array #get dataset = [speed,num_car,holiday] dataset = np.array([[]]) for i in range(0,len(speed)): buffer = np.array([]) buffer = np.append(buffer,round(speed[i])) buffer = np.append(buffer,round(num_car[i])) buffer = np.append(buffer,round(holiday[i])) #buffer = np.append(buffer,round(week[i])) buffer2 = np.array([buffer]) if i == 0: dataset = buffer2 else: dataset = np.concatenate((dataset,buffer2)) dataset = (np.asarray(dataset, 'float32')) ''' rescale = np.array([[]]) for i in range(0,dataset.shape[0]): buffer = np.array([]) buffer = np.append(buffer,(dataset[i,0] - np.min(speed)) / (np.max(speed)-np.min(speed))) buffer = np.append(buffer,(dataset[i,1] - np.min(num_car)) / (np.max(num_car)-np.min(num_car))) buffer = np.append(buffer,(dataset[i,2] - np.min(holiday)) / (np.max(holiday)-np.min(holiday))) #buffer = np.append(buffer,(dataset[i,3] - np.min(week)) / (np.max(week)-np.min(week))) buffer2 = np.array([buffer]) if i == 0: rescale = buffer2 else: rescale = np.concatenate((rescale,buffer2)) ''' rescale = np.array([[]]) for i in range(0,dataset.shape[0]): buffer = np.array([]) buffer = np.append(buffer,(dataset[i,0] - np.mean(speed)) / (np.std(speed))) buffer = np.append(buffer,(dataset[i,1] - np.mean(num_car)) / (np.std(num_car))) buffer = np.append(buffer,(dataset[i,2] - np.mean(holiday)) / (np.std(holiday))) #buffer = np.append(buffer,(dataset[i,3] - np.mean(week)) / (np.std(week))) buffer2 = np.array([buffer]) if i == 0: rescale = buffer2 else: rescale = np.concatenate((rescale,buffer2)) train_ratio = 0.75 divider = int(round(train_ratio*rescale.shape[0])) pred_minutes = 10 #divide data into train and test X_train = rescale[:divider-int(pred_minutes/5)] X_test = rescale[divider:-int(pred_minutes/5)] Y_train = rescale[int(pred_minutes/5):divider] Y_test = dataset[divider+int(pred_minutes/5):] # setup model structure print('Creating training model...') rbm = GBRBM(hidden_dim, input_dim=input_dim, init=glorot_uniform_sigm, activation='sigmoid', nb_gibbs_steps=nb_gibbs_steps, persistent=True, batch_size=batch_size, dropout=0.0) rbm.srng = RandomStreams(seed=srng_seed) train_model = Sequential() train_model.add(rbm) #train_model.add(Dense(1, activation='sigmoid')) #train_model.summary() opt = SGD(lr, 0., decay=0.0, nesterov=False) loss=rbm.contrastive_divergence_loss metrics = [rbm.reconstruction_loss] # compile theano graph print('Compiling Theano graph...') train_model.compile(optimizer=opt, loss=loss, metrics=metrics) # do training print('Training...') train_model.fit(X_train, Y_train, batch_size, nb_epoch, verbose=1, shuffle=False) # generate hidden features from input data print('Creating inference model...') h_given_x = rbm.get_h_given_x_layer(as_initial_layer=True) inference_model = Sequential() #inference_model.add(Dense(6, input_dim = 2, activation='relu')) inference_model.add(h_given_x) #inference_model.add(Dense(8, activation='relu')) #inference_model.add(SampleBernoulli(mode='maximum_likelihood')) print('Compiling Theano graph...') inference_model.compile(opt, loss='mean_squared_error') print('Doing inference...') h = inference_model.predict(X_test) print(h) ''' #convert result to real speed speed_result = [] for i in range(0,len(h)): speed_result.append(round((h[i,0]*(np.max(speed)-np.min(speed)) + np.min(speed)))) #transfrom all predicted value into speed value speed_result = np.array(speed_result) print(speed_result) ''' #convert result to real speed speed_result = [] for i in range(0,len(h)): speed_result.append(round((h[i,0]*(np.std(speed)) + np.mean(speed)))) #transfrom all predicted value into speed value speed_result = np.array(speed_result) print(speed_result) ''' range_sd = genfromtxt('.\\clustering.csv', delimiter=',')[1:,-5] check_count = 0 ############################################## for i in range(0,len(timebuffer)-1): sd = [] j=0 if timebuffer[i] == '6:00': #This is 1 day data while timebuffer[i+j] != '9:05': print(i) sd.append(abs(range_sd[i+j])) j+=1 sd = np.array(sd) threshold = np.mean(sd) threshold = math.ceil(threshold) if(threshold <= 5): threshold = 5 #print(threshold) for j in range(0,37): check = abs(Y_test[i+j,0] - speed_result[i+j]) > abs(threshold) if check == True: check_count+=1 accuracy = (float(speed_result.shape[0]-check_count)/speed_result.shape[0])*100 print("RBM Prediction Accuracy = %.2f %%" % accuracy) ############################################## ''' #Evaluation part #set speed difference threshold #threshold = genfromtxt('.\\clustering.csv', delimiter=',')[1:,-5] threshold = 5 mse_buffer = 0 mae_buffer = 0 check_count = 0 for i in range(0,speed_result.shape[0]): check = abs(Y_test[i,0] - speed_result[i]) > abs(threshold) mse_buffer += (Y_test[i,0] - speed_result[i])*(Y_test[i,0] - speed_result[i]) mae_buffer += abs(Y_test[i,0] - speed_result[i]) if check == True: check_count+=1 accuracy = (float(speed_result.shape[0]-check_count)/speed_result.shape[0])*100 print("RBM Prediction Accuracy = %.2f %%" % accuracy) mse = math.sqrt(mse_buffer/speed_result.shape[0]) mae = mae_buffer/speed_result.shape[0] print("MSE = %.2f %%" % mse) print("MAE = %.2f %%" % mae) print('Done!') less_than_5 = 0 less = 0 more = 0 more_than_5 = 0 equal = 0 for i in range(0,speed_result.shape[0]): if (speed_result[i] - Y_test[i,0]) < -threshold and (speed_result[i] - Y_test[i,0]) < 0: less_than_5 = less_than_5 +1 elif (speed_result[i] - Y_test[i,0]) >= -threshold and (speed_result[i] - Y_test[i,0]) < 0: less = less +1 elif (speed_result[i] - Y_test[i,0]) == 0: equal = equal +1 elif (speed_result[i] - Y_test[i,0]) <= threshold and (speed_result[i] - Y_test[i,0]) > 0: more = more +1 elif (speed_result[i] - Y_test[i,0]) > threshold and (speed_result[i] - Y_test[i,0]) > 0: more_than_5 = more_than_5 +1 less_than_5 = (less_than_5/speed_result.shape[0])*100 less = (less/speed_result.shape[0])*100 equal = (equal/speed_result.shape[0])*100 more = (more/speed_result.shape[0])*100 more_than_5 = (more_than_5/speed_result.shape[0])*100 print("outbound lower = %.2f %% " % less_than_5) print("lower = %.2f %%" % less) print("equal = %.2f %%" % equal) print("higher = %.2f %%" % more) print("outbound higher = %.2f %%" % more_than_5) print('Done!') with open('speed.csv', 'wb') as f: writer = csv.writer(f, delimiter = ',') for row in Y_test: writer.writerow([row[0]]) with open('result.csv', 'wb') as f: writer = csv.writer(f, delimiter = ',') for row in speed_result: writer.writerow([row]) '''
def main(): # generate dummy dataset def importdict(filename): #creates a function to read the csv #create data frame from csv with pandas module df = pd.read_csv(filename + '.csv', names=['systemtime', 'Var1', 'var2'], sep=';', parse_dates=[0]) #or:, infer_datetime_format=True) fileDATES = df.T.to_dict().values( ) #export the data frame to a python dictionary return fileDATES #return the dictionary to work with it outside the function fileDATES = importdict('clustering') timebuffer = [] for i in range(1, len(fileDATES)): timebuffer.append((fileDATES[i]['systemtime'].split(",") )[2]) #append only time into list #A load_data = genfromtxt( 'C:\Users\oob13\Desktop\Internship\TrafficFlowPrediction\keras_extensions-master\examples\clustering.csv', delimiter=',')[1:5185, -3] #filter data in time range 6.00am to 9.00am speed = [] for i in range(0, len(timebuffer)): if timebuffer[i] == '6:00': while timebuffer[i] != '9:05': speed.append(load_data[i]) i += 1 speed = np.array(speed) #generate 2d array dataset = np.array([[]]) for i in range(0, len(speed), 1): buffer = np.array([]) for j in range(0, 1): buffer = np.append(buffer, round(speed[i + j])) buffer2 = np.array([buffer]) if i == 0: dataset = buffer2 else: dataset = np.concatenate((dataset, buffer2)) dataset = (np.asarray(dataset, 'float32'))[:-1] #transform datset to 0-1 value rescale = (dataset - np.min(speed)) / (np.max(speed) - np.min(speed)) #dataset = np.random.normal(loc=np.zeros(input_dim), scale=np.ones(input_dim), size=(nframes, input_dim)) # split into train and test portion #ntest = int(0.1*len(speed)) #X_train = rescale[:-ntest :] # all but last 1000 samples for training #X_test = rescale[-ntest:, :] # last 1000 samples for testing #Y_train = rescale[:-ntest :] ''' X_train = dataset[:-1] X_test = X_train Y_train = dataset[1:] Y_test = Y_train ''' X_train = rescale[:-1] X_test = X_train Y_train = rescale[1:] Y_test = dataset[1:] ''' X_train = rescale[:-1] X_test = X_train Y_train = dataset[1:] Y_test = Y_train ''' ''' # split into train and test portion ntest = int(0.1*len(speed)) X_train = rescale[:-ntest] X_train = X_train[:-1] Y_train = dataset[:-ntest] Y_train = Y_train[1:] X_test = rescale[-ntest:] X_test = X_test[:-1] Y_test = dataset[-ntest:] Y_test = Y_test[1:] ''' #assert X_train.shape[0] >= X_test.shape[0], 'Train set should be at least size of test set!' # setup model structure print('Creating training model...') rbm = GBRBM(hidden_dim, input_dim=input_dim, init=glorot_uniform_sigm, activation='relu', nb_gibbs_steps=nb_gibbs_steps, persistent=True, batch_size=batch_size, dropout=0.0) rbm.srng = RandomStreams(seed=srng_seed) train_model = Sequential() train_model.add(rbm) train_model.summary() opt = SGD(lr, 0., decay=0.0, nesterov=False) loss = rbm.contrastive_divergence_loss metrics = [rbm.reconstruction_loss] # compile theano graph print('Compiling Theano graph...') train_model.compile(optimizer=opt, loss=loss, metrics=metrics) # do training print('Training...') #train_model.fit(X_train, Y_train, batch_size, nb_epoch, dverbose=1, shuffle=False) # generate hidden features from input data print('Creating inference model...') h_given_x = rbm.get_h_given_x_layer(as_initial_layer=True) inference_model = Sequential() inference_model.add(h_given_x) #inference_model.add(SampleBernoulli(mode='maximum_likelihood')) print('Compiling Theano graph...') inference_model.compile(opt, loss='mean_squared_error') inference_model.fit(X_train, Y_train, batch_size, nb_epoch, shuffle=False) print('Doing inference...') h = inference_model.predict(X_test) showtomygod = h with open("show.csv", "wb") as f: writer = csv.writer(f) writer.writerows(showtomygod[0:100, :]) ''' for i in range(0,len(dataset)): if dataset[i] == round(np.average(dataset)): base = dataset[i] itemindex = np.where(dataset==base)[0][0] base_transform = h[itemindex-1] float_base_transform = float(base_transform) diff_ratio = (h[1]-h[0])/(dataset[2]-dataset[1]) for i in range(0,len(h)) : h[i] = round(((h[i]-float_base_transform)/diff_ratio) + np.average(dataset)) ''' print(dataset) print(h) #save to csv print('Done!') with open("dataset.csv", "wb") as f: writer = csv.writer(f) writer.writerows(dataset[0:100, :]) with open("houtput.csv", "wb") as f: writer = csv.writer(f) writer.writerows(h[0:100, :]) with open("rescale.csv", "wb") as f: writer = csv.writer(f) writer.writerows(rescale[0:100, :]) #Evaluation part #set speed difference threshold #threshold = genfromtxt('C:\Users\oob13\Desktop\Internship\TrafficFlowPrediction\keras_extensions-master\examples\clustering.csv', delimiter=',')[1:,-5] threshold = 5 check_count = 0 for i in range(0, len(h)): #check = abs(Y_test[i] - h[i]) > abs(threshold[i]) check = abs(Y_test[i] - h[i]) > abs(threshold) if check == True: check_count += 1 #print("error predict: pred {0} truth {1} threshold {2}" .format(h[i],Y_test[i],abs(threshold[i]))) print("error predict: pred {0} truth {1} threshold {2}".format( h[i], Y_test[i], abs(threshold))) accuracy = (float(h.shape[0] - check_count) / h.shape[0]) * 100 print("RBM Accuracy = %.2f %%" % accuracy)
def main(): #grab input data set and set up dataset here X_train = [] X_test = [] print('Creating training model') #start with a GBRBM and then followed by 5 more RBMs for 5*2 = 10 hidden layers dbn = DBN([ GBRBM(input_dim, internal_dim, init=glorot_uniform_sigm), RBM(internal_dim, internal_dim, init=glorot_uniform_sigm), RBM(internal_dim, internal_dim, init=glorot_uniform_sigm), RBM(internal_dim, internal_dim, init=glorot_uniform_sigm), RBM(internal_dim, internal_dim, init=glorot_uniform_sigm), RBM(internal_dim, internal_dim, init=glorot_uniform_sigm) ]) def get_layer_loss(rbm, layer_no): return rbm.contrastive_divergence_loss(nb_gibbs_steps=1) def get_layer_optimizer(layer_no): return SGD((layer_no + 1) * lr, 0., decay=0.0, nesterov=False) dbn.compile(layer_optimizer=get_layer_optimizer, layer_loss=get_layer_loss) #Train #train off token vectors from early version of software print('Training') begin_time = time.time() dbn.fit(X_train, batch_size, nb_epoch, verbose=1, shuffle=False) end_time = time.time() print('Training took %f minutes' % ((end_time - begin_time) / 60.0)) #save model parameters from training print('Saving model') dbn.save_weights('dbn_weights.hdf5', overwrite=True) #load model from save print('Loading model') dbn.load_weights('dbn_weights.hdf5') #generate hidden features from input data print('Creating inference model') F = dbn.get_forward_inference_layers() B = dbn.get_backwards_inference_layers() inference_model = Sequential() for f in F: inference_model.add(f) inference_model.add(SampleBernoulli(mode='random')) for b in B[:-1]: inference_model.add(b) inference_model.add(SampleBernoulli(mode='random')) #last layer is a gaussian layer inference_model.add(B[-1]) print('Compiling Theano graph') opt = SGD() inference_model.compile(opt, loss='mean_squared_error') print('Doing inference') h = inference_model.predict(X_test)
def build_model(): # generate dummy dataset def importdict(filename): #creates a function to read the csv #create data frame from csv with pandas module df = pd.read_csv(filename + '.csv', names=['systemtime', 'Var1', 'var2'], sep=';', parse_dates=[0]) #or:, infer_datetime_format=True) fileDATES = df.T.to_dict().values( ) #export the data frame to a python dictionary return fileDATES #return the dictionary to work with it outside the function fileDATES = importdict('.\\clustering2') #get time and keep it in list #use time to filter data later timebuffer = [] for i in range(1, len(fileDATES)): timebuffer.append((fileDATES[i]['systemtime'].split(",") )[2]) #append only time into list #A timebuffer = timebuffer[0:7499] #load any features CarsSpeed = genfromtxt('.\\clustering2.csv', delimiter=',')[1:7500, -3] #CarsTotal = genfromtxt('C:\Users\oob13\Desktop\Internship\TrafficFlowPrediction\clustering2.csv', delimiter=',')[1:,4] #hol = genfromtxt('C:\Users\oob13\Desktop\Internship\TrafficFlowPrediction\clustering2.csv', delimiter=',')[1:,-7] #use all speed speed = np.array(CarsSpeed) ''' #filter data in time range 6.00am to 9.00am #open comment depends on feature used #get speed since 6.00 am to 9.00 am speed = [] for i in range(0,len(timebuffer)): #print(timebuffer[i]) if timebuffer[i] == '6:00': while timebuffer[i] != '9:05': speed.append(CarsSpeed[i]) i+=1 speed = np.array(speed) #get number of car since 6.00 am to 9.00 am num_car = [] for i in range(0,len(timebuffer)): if timebuffer[i] == '6:00': while timebuffer[i] != '9:05': num_car.append(CarsTotal[i]) i+=1 num_car = np.array(num_car) #get holiday data since 6.00 am to 9.00 am holiday = [] for i in range(0,len(timebuffer)): if timebuffer[i] == '6:00': while timebuffer[i] != '9:05': holiday.append(hol[i]) i+=1 holiday = np.array(holiday) ''' #combine speed and number of car into dataset 2d array #get dataset = [100] -> [[100]] dataset = np.array([[]]) for i in range(0, len(speed)): buffer = np.array([]) buffer = np.append(buffer, (speed[i])) #buffer = np.append(buffer,round(num_car[i])) #buffer = np.append(buffer,round(holiday[i])) buffer2 = np.array([buffer]) if i == 0: dataset = buffer2 else: dataset = np.concatenate((dataset, buffer2)) dataset = (np.asarray(dataset, 'float32')) #rescale from real speed to 0-1 value and keep in "buffer" variable #one day has 288 data buffer = np.array([]) for i in range(0, len(speed) - 287, 287): buffer_speed = [] #start in one day for j in range(i, i + 288): buffer_speed.append(speed[j]) buffer_speed = np.array(buffer_speed) #get mean and std in one day mean_day = np.mean(buffer_speed) std_day = np.std(buffer_speed) #resacle by equation: z = (x - mean(x))/std(x) for k in range(i, i + 288): regular = (speed[k] - mean_day) / (std_day) buffer = np.append(buffer, [regular]) rescale = buffer ''' #change data from 1d array to 2d array #get [0.03] -> [[0.03]] rescale = np.array([[]]) for i in range(0,len(speed)): buffer2 = np.array([]) buffer2 = np.append(buffer2,(buffer[i])) buffer2 = np.array([buffer2]) if i == 0: rescale = buffer2 else: rescale = np.concatenate((rescale,buffer2)) ''' #divide training data and testing data train_ratio = 0.75 divider = int(round(train_ratio * rescale.shape[0])) #set future minutes for predicting pred_minutes = 5 #divide data into train and test X_train = rescale[:divider - int(pred_minutes / 5)] X_test = rescale[divider:-int(pred_minutes / 5)] Y_train = dataset[int(pred_minutes / 5):divider] Y_test = dataset[divider + int(pred_minutes / 5):] # setup model structure print('Creating training model...') rbm = GBRBM(hidden_dim, input_dim=input_dim, init=glorot_uniform_sigm, activation='sigmoid', nb_gibbs_steps=nb_gibbs_steps, persistent=True, batch_size=batch_size, dropout=0.0) rbm.srng = RandomStreams(seed=srng_seed) train_model = Sequential() train_model.add(rbm) #set optimizer as Stochastic gradient descent #set loss fnction as contrastive divergence loss opt = SGD(lr, 0., decay=0.0, nesterov=False) loss = rbm.contrastive_divergence_loss metrics = [rbm.reconstruction_loss] # compile theano graph print('Compiling Theano graph...') train_model.compile(optimizer=opt, loss=loss, metrics=metrics) # do training print('Training...') train_model.fit(X_train, Y_train, batch_size, nb_epoch, verbose=1, shuffle=False) # generate hidden features from input data print('Creating inference model...') h_given_x = rbm.get_h_given_x_layer(as_initial_layer=True) #add output layer to model inference_model = Sequential() inference_model.add(h_given_x) print('Compiling Theano graph...') inference_model.compile(opt, loss='mean_squared_error') #predicting result #get 0-1 values print('Doing inference...') h = inference_model.predict(X_test) #print(h) #convert result to real speed #use invert of the same equation #speed_result var is the predicted speed after values are transformed speed_result = [] for i in range(0, len(h)): speed_result.append(round( (h[i, 0] * (np.std(speed)) + np.mean(speed)))) #transfrom all predicted value into speed value speed_result = np.array(speed_result) #print(speed_result) #Evaluation part #find accuracy of model by using threshold #set speed difference threshold threshold = 5 check_count = 0 for i in range(0, speed_result.shape[0]): check = abs(Y_test[i, 0] - speed_result[i]) > abs(threshold) if check == True: check_count += 1 accuracy = (float(speed_result.shape[0] - check_count) / speed_result.shape[0]) * 100 print("RBM Prediction Accuracy = %.2f %%" % accuracy) print('Done!') #find root mean square error def rmse(predictions, targets): return (np.sqrt(((predictions - targets)**2).mean())) print("MSE") print(rmse(speed_result, Y_test[:, 0])) #find mean absolute error def mae(predictions, targets): return ((np.absolute(predictions - targets)).mean()) print("MAE") print(mae(speed_result, Y_test[:, 0])) #save ground-truth value and predicted value to csv with open('speed.csv', 'wb') as f: writer = csv.writer(f, delimiter=',') for row in Y_test: writer.writerow([row[0]]) with open('result.csv', 'wb') as f: writer = csv.writer(f, delimiter=',') for row in speed_result: writer.writerow([row])
def main(): # generate dummy dataset nframes = 10000 dataset = np.random.normal(loc=np.zeros(input_dim), scale=np.ones(input_dim), size=(nframes, input_dim)) # split into train and test portion ntest = 1000 X_train = dataset[:-ntest:] # all but last 1000 samples for training X_test = dataset[-ntest:, :] # last 1000 samples for testing assert X_train.shape[0] >= X_test.shape[ 0], 'Train set should be at least size of test set!' # setup model structure print('Creating training model...') rbm = GBRBM(hidden_dim, input_dim=input_dim, init=glorot_uniform_sigm, activation='relu', nb_gibbs_steps=nb_gibbs_steps, persistent=True, batch_size=batch_size, dropout=0.0) rbm.srng = RandomStreams(seed=srng_seed) train_model = Sequential() train_model.add(rbm) opt = SGD(lr, 0., decay=0.0, nesterov=False) loss = rbm.contrastive_divergence_loss metrics = [rbm.reconstruction_loss] # compile theano graph print('Compiling Theano graph...') train_model.compile(optimizer=opt, loss=loss, metrics=metrics) # do training print('Training...') train_model.fit(X_train, X_train, batch_size, nb_epoch, verbose=1, shuffle=False) # generate hidden features from input data print('Creating inference model...') h_given_x = rbm.get_h_given_x_layer(as_initial_layer=True) inference_model = Sequential() inference_model.add(h_given_x) #inference_model.add(SampleBernoulli(mode='maximum_likelihood')) print('Compiling Theano graph...') inference_model.compile(opt, loss='mean_squared_error') print('Doing inference...') h = inference_model.predict(dataset) print(h) print('Done!')
def main(): # generate dummy dataset nframes = 10000 dataset = np.random.normal(loc=np.zeros(input_dim), scale=np.ones(input_dim), size=(nframes, input_dim)) # standardize (in this case superfluous) #dataset, mean, stddev = standardize(dataset) # split into train and test portion ntest = 1000 X_train = dataset[:-ntest:] # all but last 1000 samples for training X_test = dataset[-ntest:, :] # last 1000 samples for testing X_trainsub = dataset[: ntest, :] # subset of training data with same number of samples as testset assert X_train.shape[0] >= X_test.shape[ 0], 'Train set should be at least size of test set!' # setup model structure print('Creating training model...') dbn = DBN([ GBRBM(input_dim, 200, init=glorot_uniform_sigm), RBM(200, 400, init=glorot_uniform_sigm), RBM(400, 300, init=glorot_uniform_sigm), RBM(300, 50, init=glorot_uniform_sigm), RBM(50, hidden_dim, init=glorot_uniform_sigm) ]) # setup optimizer, loss def get_layer_loss(rbm, layer_no): return rbm.contrastive_divergence_loss(nb_gibbs_steps=1) def get_layer_optimizer(layer_no): return SGD((layer_no + 1) * lr, 0., decay=0.0, nesterov=False) dbn.compile(layer_optimizer=get_layer_optimizer, layer_loss=get_layer_loss) # do training print('Training...') begin_time = time.time() #callbacks = [momentum_scheduler, rec_err_logger, free_energy_gap_logger] dbn.fit(X_train, batch_size, nb_epoch, verbose=1, shuffle=False) end_time = time.time() print('Training took %f minutes' % ((end_time - begin_time) / 60.0)) # save model parameters print('Saving model...') dbn.save_weights('example.hdf5', overwrite=True) # load model parameters print('Loading model...') dbn.load_weights('example.hdf5') # generate hidden features from input data print('Creating inference model...') F = dbn.get_forward_inference_layers() B = dbn.get_backward_inference_layers() inference_model = Sequential() for f in F: inference_model.add(f) inference_model.add(SampleBernoulli(mode='random')) for b in B[:-1]: inference_model.add(b) inference_model.add(SampleBernoulli(mode='random')) # last layer is a gaussian layer inference_model.add(B[-1]) print('Compiling Theano graph...') opt = SGD() inference_model.compile( opt, loss='mean_squared_error') # XXX: optimizer and loss are not used! print('Doing inference...') h = inference_model.predict(dataset) print(h) print('Done!')