def main(make_data_set=True, prediction_lead='60', feature_set='ipw_refl', n_estimators=100, file_name='RF_experiment_1.pkl'): if make_data_set == 'False': make_data_set = False else: make_data_set = True data_builder = BuildDataSet.dataset(num_points=500) training_blocks, validation_blocks = build_training_validation_sets( data_builder) train, validation = arrange_training_validation(training_blocks, validation_blocks) if make_data_set: if prediction_lead == '60': print 'Building data set for 60 minute prediction' make_dataset_RF(data_builder) else: print 'Building data set for 30 minute prediction' make_dataset_RF_30minPrediction(data_builder) performance = RF_classifier(train, validation, n_estimators, feature_set) f1 = file('../output/' + str(n_estimators) + '_' + file_name, 'wb') pkl.dump(performance, f1, protocol=pkl.HIGHEST_PROTOCOL) f1.close()
def call_nowcast_storm(): # Based of the optimal epoch measured against CSI # 2CNN_0maxpool_2048neural_network_p20_special_0_40.pkl # 2CNN_0maxpool_2048neural_network_p20_special_1_50.pkl # ------------------------------------------------------# # 2CNN Network list for IPW + refl # network_list = ['2CNN_0maxpool_2048neural_network_p20_special_0_40.pkl', # '2CNN_0maxpool_2048neural_network_p20_special_1_50.pkl', # '2CNN_0maxpool_2048neural_network_p20_special_2_140.pkl', # '2CNN_0maxpool_2048neural_network_p20_special_3_110.pkl'] # ------------------------------------------------------# # 2CNN Network list for refl network_list = [ '2CNN_0maxpool_2048neural_network_p20_refl_0_79.pkl', '2CNN_0maxpool_2048neural_network_p20_refl_1_99.pkl', '2CNN_0maxpool_2048neural_network_p20_refl_2_39.pkl', '2CNN_0maxpool_2048neural_network_p20_refl_3_19.pkl' ] # ------------------------------------------------------# data_builder = BuildDataSet.dataset(num_points=500) base_path = '../output/thesis_results/' training_blocks, validation_blocks = build_training_validation_sets( data_builder) for network_file in network_list: network_path = base_path + network_file temp_file = network_path.split('/')[-1] val_set = int(re.findall('\d+', temp_file)[-2]) print 'Running Validation Set %d' % val_set prediction_list = nowcast_storm(data_builder, validation_blocks[val_set], network_path) f1 = file('2CNN_prediction_file_refl_' + str(val_set) + '.pkl', 'wb') pkl.dump(prediction_list, f1, protocol=pkl.HIGHEST_PROTOCOL) f1.close()
def main(make_data_set = False, prediction_lead = '30',feature_set = 'ipw_refl',n_estimators = 300,file_name = 'RF_experiment_1.pkl',max_depth = 6,max_features = None): print 'Saving file to ' + '../output/RF_experiments/' + str(n_estimators) + file_name + str(max_features) + '_max_depth' + str(max_depth) + '.pkl' if make_data_set == 'False': make_data_set = False else: make_data_set = True if max_features == 'None': print 'Setting max_features to None!' max_features = None data_builder = BuildDataSet.dataset(num_points = 500) training_blocks,validation_blocks = build_training_validation_sets(data_builder) if make_data_set: if prediction_lead == '60': print 'Building data set for 60 minute prediction' make_dataset_RF(data_builder) else: print 'Building data set for 30 minute prediction' make_dataset_RF_30minPrediction(data_builder) train,validation = arrange_training_validation(training_blocks,validation_blocks) performance = RF_classifier(train,validation,n_estimators,max_depth,feature_set,max_features) # calculate the averages here before you pkl f1 = file('../output/RF_experiments/' + str(n_estimators) + file_name + str(max_features) + '_max_depth' + str(max_depth) + '.pkl','wb') pkl.dump(performance,f1,protocol = pkl.HIGHEST_PROTOCOL) f1.close()
def call_nowcast_storm(): # Based of the optimal epoch measured against CSI # 2CNN_0maxpool_2048neural_network_p20_special_0_40.pkl # 2CNN_0maxpool_2048neural_network_p20_special_1_50.pkl # ------------------------------------------------------# # 2CNN Network list for IPW + refl # network_list = ['2CNN_0maxpool_2048neural_network_p20_special_0_40.pkl', # '2CNN_0maxpool_2048neural_network_p20_special_1_50.pkl', # '2CNN_0maxpool_2048neural_network_p20_special_2_140.pkl', # '2CNN_0maxpool_2048neural_network_p20_special_3_110.pkl'] # ------------------------------------------------------# # 2CNN Network list for refl network_list = ['2CNN_0maxpool_2048neural_network_p20_refl_0_79.pkl', '2CNN_0maxpool_2048neural_network_p20_refl_1_99.pkl', '2CNN_0maxpool_2048neural_network_p20_refl_2_39.pkl', '2CNN_0maxpool_2048neural_network_p20_refl_3_19.pkl'] # ------------------------------------------------------# data_builder = BuildDataSet.dataset(num_points = 500) base_path = '../output/thesis_results/' training_blocks,validation_blocks = build_training_validation_sets(data_builder) for network_file in network_list: network_path = base_path + network_file temp_file = network_path.split('/')[-1] val_set = int(re.findall('\d+',temp_file)[-2]) print 'Running Validation Set %d'%val_set prediction_list = nowcast_storm(data_builder,validation_blocks[val_set],network_path) f1 = file('2CNN_prediction_file_refl_' + str(val_set) + '.pkl','wb') pkl.dump(prediction_list,f1,protocol = pkl.HIGHEST_PROTOCOL) f1.close()
def main(): print 'Plot reflectivity' data_builder = BuildDataSet.dataset(num_points = 500) yr = 14 storm_dates = data_builder.load_storm_days(yr) for d in storm_dates[:1]: for t in range(48): plot_reflectivity(t,d[0],yr)
def main(): print 'Plot reflectivity' data_builder = BuildDataSet.dataset(num_points=500) yr = 14 storm_dates = data_builder.load_storm_days(yr) for d in storm_dates[:1]: for t in range(48): plot_reflectivity(t, d[0], yr)
def main(make_data_set = False, prediction_lead = '60',feature_set = 'ipw_refl',n_estimators = 300,file_name = 'RF_experiment_1.pkl'): print 'Saving file to ' + '../output/RF_experiments/' + str(n_estimators) + file_name if make_data_set == 'False': make_data_set = False else: make_data_set = True data_builder = BuildDataSet.dataset(num_points = 500) training_blocks,validation_blocks = build_training_validation_sets(data_builder) cca_mdl = fit_CCA(training_blocks[0],data_builder) transform_predict_RF(cca_mdl,training_blocks[0],validation_blocks[0],data_builder)
def reflectivity_monthly_distribution(): '''Plots the distribution for each month in the data set. Given an archive of NEXRAD files we are going to seek to find the days where rainfall was present to pupulate out data set. Try and compute the monthly distribution of rainfall the prediction domain.''' DFW = DFWnet.CommonData() reflectivity = BuildDataSet.reflectivity_fields() # Loop thru all the days in the data set 121 - 144 # Download each file keep the level 3 data throw the rest # then check for each day whether there was a storm or not # if there was a storm keep that folder # else delete that folder and its contents order_dict = {} order_dict = {14: 'HAS010777764', 15: 'HAS010777767'} initial = os.getcwd() for yr in [14]: for d in range(159, 244): DFW.doytodate(int(yr), d) # file_to_get = 'NWS_NEXRAD_NXL3_KFWS_20' +DFW.yr + DFW.mon + DFW.day + '000000_20' + DFW.yr + DFW.mon + DFW.day + '235959.tar.gz' new_dir = '../data/RadarData/NEXRAD/20' + str( yr) + os.sep + Months[int(DFW.mon) - 1] + DFW.day + os.sep if not os.path.exists(new_dir): os.mkdir(new_dir) os.chdir(new_dir) reflectivity.FTPNEXRADfile(DFW.mon, DFW.day, DFW.yr, order_dict[yr]) os.chdir(initial) reflectivity.keepLevel3files(new_dir) reflectivity.ConvertToNETCDF(new_dir) # Develop a logic here which takes files that have an average greater than # 20 dBZ for its rainy days file_list = os.listdir(new_dir) # define an array the with size of the number of files in that day # and a 100x100 grid to hold each time step worth of data out_array = np.zeros((len(file_list), 100, 100)) time_array = [] for i, fl in enumerate(file_list): rad = Dataset(new_dir + fl) out_array[ i, ...] = reflectivity.reflectivity_polar_to_cartesian(rad) time_array.append(rad.time_coverage_end.split('T')[1]) os.remove(new_dir + fl) np.save( new_dir + 'reflectivity_array_' + str(yr) + '_' + str(d) + '.npy', out_array) np.save(new_dir + 'time_array_' + str(yr) + '_' + str(d) + '.npy', time_array)
def main(): data_builder = BuildDataSet.dataset(num_points=500) pixels = data_builder.sample_random_pixels() base_path = "../data/RadarData/Decimated/" file_list = os.listdir(base_path) file_list = filter(lambda x: x[-4:] == ".npy", file_list) full_data = np.zeros((500, len(file_list), 48)) pt_ctr = 0 for x_, y_ in pixels: print x_, y_ for f_ctr, f in enumerate(file_list): temp_array = np.load(base_path + f) full_data[pt_ctr, f_ctr, :] = temp_array[:, y_, x_] pt_ctr += 1 np.save(base_path + "full_data_decimated.npy", full_data)
def main(): data_builder = BuildDataSet.dataset(num_points=500) pixels = data_builder.sample_random_pixels() base_path = '../data/RadarData/Decimated/' file_list = os.listdir(base_path) file_list = filter(lambda x: x[-4:] == '.npy', file_list) full_data = np.zeros((500, len(file_list), 48)) pt_ctr = 0 for x_, y_ in pixels: print x_, y_ for f_ctr, f in enumerate(file_list): temp_array = np.load(base_path + f) full_data[pt_ctr, f_ctr, :] = temp_array[:, y_, x_] pt_ctr += 1 np.save(base_path + 'full_data_decimated.npy', full_data)
def main(convert_to_images=False): data_builder = BuildDataSet.dataset(num_points=500) training_blocks, validation_blocks = build_training_validation_sets(data_builder) # make_dataset_NN_2(data_builder) if convert_to_images: # convert_fields_to_images(data_builder) make_dataset_NN(data_builder) train, validation = arrange_training_validation(training_blocks, validation_blocks) exp_no = 0 for tr, val in zip(train, validation): print "Train Test Split %d " % (exp_no + 1) neural_network_model_new(tr, val, exp_no + 1) # convolution_neural_network_model(train[0],validation[0], exp_no + 1,100) # empty_network(tr,val,exp_no+1,1) exp_no += 1
def calc_metrics(network_path): temp_file = network_path.split('/')[-1] data_builder = BuildDataSet.dataset(num_points = 500) val_set = int(re.findall('\d+',temp_file)[-2]) # network_params['num_conv_layers'] = num_conv_layers training_blocks,validation_blocks = build_training_validation_sets(data_builder) performance = get_frames_points(training_blocks[val_set],validation_blocks[val_set],data_builder,network_path) return (performance.POD,performance.FAR,performance.CSI,performance.average_precision)
def main(make_data_set=False, prediction_lead='60', feature_set='ipw_refl', n_estimators=300, file_name='RF_experiment_1.pkl'): print 'Saving file to ' + '../output/RF_experiments/' + str( n_estimators) + file_name if make_data_set == 'False': make_data_set = False else: make_data_set = True data_builder = BuildDataSet.dataset(num_points=500) training_blocks, validation_blocks = build_training_validation_sets( data_builder) cca_mdl = fit_CCA(training_blocks[0], data_builder) transform_predict_RF(cca_mdl, training_blocks[0], validation_blocks[0], data_builder)
def main(convert_to_images=False): data_builder = BuildDataSet.dataset(num_points=500) training_blocks, validation_blocks = build_training_validation_sets( data_builder) # make_dataset_NN_2(data_builder) if convert_to_images: # convert_fields_to_images(data_builder) make_dataset_NN(data_builder) train, validation = arrange_training_validation(training_blocks, validation_blocks) exp_no = 0 for tr, val in zip(train, validation): print 'Train Test Split %d ' % (exp_no + 1) neural_network_model_new(tr, val, exp_no + 1) # convolution_neural_network_model(train[0],validation[0], exp_no + 1,100) # empty_network(tr,val,exp_no+1,1) exp_no += 1
def main(make_data_set = True, prediction_lead = '60',feature_set = 'ipw_refl',n_estimators = 100,file_name = 'RF_experiment_1.pkl'): if make_data_set == 'False': make_data_set = False else: make_data_set = True data_builder = BuildDataSet.dataset(num_points = 500) training_blocks,validation_blocks = build_training_validation_sets(data_builder) train,validation = arrange_training_validation(training_blocks,validation_blocks) if make_data_set: if prediction_lead == '60': print 'Building data set for 60 minute prediction' make_dataset_RF(data_builder) else: print 'Building data set for 30 minute prediction' make_dataset_RF_30minPrediction(data_builder) performance = RF_classifier(train,validation,n_estimators,feature_set) f1 = file('../output/' + str(n_estimators)+ '_' + file_name,'wb') pkl.dump(performance,f1,protocol = pkl.HIGHEST_PROTOCOL) f1.close()
def reflectivity_monthly_distribution(): '''Plots the distribution for each month in the data set. Given an archive of NEXRAD files we are going to seek to find the days where rainfall was present to pupulate out data set. Try and compute the monthly distribution of rainfall the prediction domain.''' DFW = DFWnet.CommonData() reflectivity = BuildDataSet.reflectivity_fields() # Loop thru all the days in the data set 121 - 144 # Download each file keep the level 3 data throw the rest # then check for each day whether there was a storm or not # if there was a storm keep that folder # else delete that folder and its contents order_dict = {} order_dict = {14: 'HAS010777764', 15: 'HAS010777767'} initial = os.getcwd() for yr in [14]: for d in range(159,244): DFW.doytodate(int(yr),d) # file_to_get = 'NWS_NEXRAD_NXL3_KFWS_20' +DFW.yr + DFW.mon + DFW.day + '000000_20' + DFW.yr + DFW.mon + DFW.day + '235959.tar.gz' new_dir = '../data/RadarData/NEXRAD/20' + str(yr) + os.sep + Months[int(DFW.mon) -1] + DFW.day + os.sep if not os.path.exists(new_dir): os.mkdir(new_dir) os.chdir(new_dir) reflectivity.FTPNEXRADfile(DFW.mon,DFW.day,DFW.yr,order_dict[yr]) os.chdir(initial) reflectivity.keepLevel3files(new_dir) reflectivity.ConvertToNETCDF(new_dir) # Develop a logic here which takes files that have an average greater than # 20 dBZ for its rainy days file_list = os.listdir(new_dir) # define an array the with size of the number of files in that day # and a 100x100 grid to hold each time step worth of data out_array = np.zeros((len(file_list),100,100)) time_array = [] for i,fl in enumerate(file_list): rad = Dataset(new_dir + fl) out_array[i,...] = reflectivity.reflectivity_polar_to_cartesian(rad) time_array.append(rad.time_coverage_end.split('T')[1]) os.remove(new_dir + fl) np.save(new_dir + 'reflectivity_array_' + str(yr) + '_' + str(d) + '.npy',out_array) np.save(new_dir + 'time_array_' + str(yr) + '_' + str(d) + '.npy',time_array)
def calc_metrics(network_path): temp_file = network_path.split('/')[-1] data_builder = BuildDataSet.dataset(num_points=500) val_set = int(re.findall('\d+', temp_file)[-2]) # network_params['num_conv_layers'] = num_conv_layers training_blocks, validation_blocks = build_training_validation_sets( data_builder) performance = get_frames_points(training_blocks[val_set], validation_blocks[val_set], data_builder, network_path) return (performance.POD, performance.FAR, performance.CSI, performance.average_precision)
def main(make_data_set=False, prediction_lead='30', feature_set='ipw_refl', n_estimators=300, file_name='RF_experiment_1.pkl', max_depth=6, max_features=None): print 'Saving file to ' + '../output/RF_experiments/' + str( n_estimators) + file_name + str(max_features) + '_max_depth' + str( max_depth) + '.pkl' if make_data_set == 'False': make_data_set = False else: make_data_set = True if max_features == 'None': print 'Setting max_features to None!' max_features = None data_builder = BuildDataSet.dataset(num_points=500) training_blocks, validation_blocks = build_training_validation_sets( data_builder) if make_data_set: if prediction_lead == '60': print 'Building data set for 60 minute prediction' make_dataset_RF(data_builder) else: print 'Building data set for 30 minute prediction' make_dataset_RF_30minPrediction(data_builder) train, validation = arrange_training_validation(training_blocks, validation_blocks) performance = RF_classifier(train, validation, n_estimators, max_depth, feature_set, max_features) # calculate the averages here before you pkl f1 = file( '../output/RF_experiments/' + str(n_estimators) + file_name + str(max_features) + '_max_depth' + str(max_depth) + '.pkl', 'wb') pkl.dump(performance, f1, protocol=pkl.HIGHEST_PROTOCOL) f1.close()
# -*- coding: utf-8 -*- """ Created on Tue Jan 26 18:19:51 2016 @author: adityanagarajan This file visually verifies the slices """ import BuildDataSet import cPickle import os import sys import numpy as np data_builder = BuildDataSet.dataset(Threshold='binary') PixelPoints = data_builder.sample_random_pixels() #sys.stdout = os.fdopen(sys.stdout.fileno(), 'w', 0) def build_points_fields(): num_points = 80 prev_pt_tr = 0 for tr_pt in range(0, num_points, 40): print 'Building set %d ' % tr_pt train_set = data_builder.make_points_frames( PixelPoints[prev_pt_tr:tr_pt + 40]) print 'Done making training set' X_Y_train = data_builder.arrange_frames(train_set) temp_file = open( 'data/dataset2/points_' + str(prev_pt_tr) + '_' + str(tr_pt + 40) +
""" Created on Tue Mar 1 17:00:20 2016 @author: adityanagarajan This script converts the ipw and reflectivit fields from floating point to 8 bit int """ import numpy as np import BuildDataSet from matplotlib import pyplot as plt import matplotlib.cm as cm import DFWnet file_path = 'data/TrainTest/' data_builder = BuildDataSet.dataset(yr = '2014') network = DFWnet.CommonData() map_ipw_array = np.linspace(-5,5,256) map_refl_array = np.linspace(0,90,256) #Trust me and uncomment this code for converting IPW to image #for f in data_builder.IPWfiles: # arr = np.load(file_path + f) # new_array = np.zeros((100,100),dtype='uint8') # for i in range(arr.shape[0]): # for j in range(arr.shape[1]): # new_array[i,j] = np.argmin(np.abs(arr[i,j] - map_ipw_array)) # np.save(file_path + f.split('.')[0] + 'img.npy',new_array) #
network_file_name = '1CNN_1maxpool_neural_network_ipw_refl' network_file = file( '../output/' + network_file_name + '_' + str(exp_no) + '_' + str(ep + 1) + '.pkl', 'wb') pkl.dump(params, network_file, protocol=pkl.HIGHEST_PROTOCOL) network_file.close() f1 = file( '../output/performance_metrics_' + network_file_name + str(exp_no) + '.pkl', 'wb') pkl.dump(performance_metrics, f1, protocol=pkl.HIGHEST_PROTOCOL) f1.close() def main(data_builder, make_data_set=False): training_blocks, validation_blocks = build_training_validation_sets( data_builder) if make_data_set: make_dataset_NN_2(data_builder) for i in range(4): print '-' * 50 print 'Validation year = %s Month = %s' % tuple(blocks[i].split('_')) conv_net(training_blocks[i], validation_blocks[i], 200, i) if __name__ == '__main__': data_builder = BuildDataSet.dataset(num_points=500) main(data_builder)
# -*- coding: utf-8 -*- """ Created on Tue Jan 26 18:19:51 2016 @author: adityanagarajan This file visually verifies the slices """ import BuildDataSet import cPickle import os import sys import numpy as np data_builder = BuildDataSet.dataset(Threshold = 'binary') PixelPoints = data_builder.sample_random_pixels() #sys.stdout = os.fdopen(sys.stdout.fileno(), 'w', 0) def build_points_fields(): num_points = 80 prev_pt_tr = 0 for tr_pt in range(0,num_points,40): print 'Building set %d '%tr_pt train_set = data_builder.make_points_frames(PixelPoints[prev_pt_tr:tr_pt + 40]) print 'Done making training set' X_Y_train = data_builder.arrange_frames(train_set) temp_file = open('data/dataset2/points_' + str(prev_pt_tr) + '_'+ str(tr_pt + 40) + '.pkl','wb') cPickle.dump((X_Y_train,PixelPoints[prev_pt_tr:tr_pt + 40]),temp_file,protocol=cPickle.HIGHEST_PROTOCOL) temp_file.close() # training_points.append(X_Y_train,PixelPoints[10:20])
""" Created on Tue Mar 1 17:00:20 2016 @author: adityanagarajan This script converts the ipw and reflectivit fields from floating point to 8 bit int """ import numpy as np import BuildDataSet from matplotlib import pyplot as plt import matplotlib.cm as cm import DFWnet file_path = 'data/TrainTest/' data_builder = BuildDataSet.dataset(yr='2014') network = DFWnet.CommonData() map_ipw_array = np.linspace(-5, 5, 256) map_refl_array = np.linspace(0, 90, 256) #Trust me and uncomment this code for converting IPW to image #for f in data_builder.IPWfiles: # arr = np.load(file_path + f) # new_array = np.zeros((100,100),dtype='uint8') # for i in range(arr.shape[0]): # for j in range(arr.shape[1]): # new_array[i,j] = np.argmin(np.abs(arr[i,j] - map_ipw_array)) # np.save(file_path + f.split('.')[0] + 'img.npy',new_array) # #
def make_predictions(yr = 14): base_path = '../output/1_CNN_experiments/' # /Users/adityanagarajan/projects/nowcaster/output/1_CNN_experiments print os.path.exists(base_path + 'CPU_1_CNN_layer_max_pool_0_200.pkl') params = np.load(base_path + 'CPU_1_CNN_layer_max_pool_0_200.pkl') print len(params) input_var = T.tensor4('inputs') network,hidden_1 = build_DCNN_maxpool_softmax(input_var) lasagne.layers.set_all_param_values(network, params) prediction_ = lasagne.layers.get_output(network, deterministic=True) predict_function = theano.function([input_var], prediction_) data_builder = BuildDataSet.dataset(num_points = 1000) # This is the range of points in the central domain which can # have a 33x33 around it. num_points = 4356 domain_points = (range(17,83),range(17,83)) PixelPoints = [(x,y) for x in domain_points[0] for y in domain_points[1]] PixelPoints = np.array(PixelPoints) storm_dates_all = data_builder.load_storm_days(yr) doy_strings = data_builder.club_days(storm_dates_all) days_in_sorted = doy_strings.keys() days_in_sorted.sort() print doy_strings ipw_files,refl_files = data_builder.sort_IPW_refl_files_imgs(yr) temp_ipw_files = filter(lambda x: re.findall('\d+',x)[1] in doy_strings['129'],ipw_files) # temp_refl_files = filter(lambda x: re.findall('\d+',x)[1] in doy_strings['129'],refl_files) temp_ipw_files = map(lambda x: '../data/dataset/20' + str(yr) + os.sep + x,temp_ipw_files) temp_refl_files = map(lambda x: '../data/dataset/20' + str(yr) + os.sep + x,temp_refl_files) # We define an array which contains the ground truth in column 1 # and the prediction probabilities in column 2 and 3 real_predictions = np.zeros((num_points,91,3)) pt_ctr = 0 for x_,y_ in zip(PixelPoints[:,0],PixelPoints[:,1]): print 'Predicting for point: (%d,%d)'%(x_,y_) temp_array = data_builder.build_features_and_truth_imgs(temp_ipw_files,temp_refl_files,x_,y_) ipw_refl_tensors = data_builder.arrange_frames_single(temp_array) X_test = ipw_refl_tensors[1] Y_test = ipw_refl_tensors[2] Y_pred = predict_function(X_test) real_predictions[pt_ctr,:,0] = Y_test.reshape(91,) # print Y_test.reshape(91,) # print np.argmax(Y_pred,axis=1) real_predictions[pt_ctr,:,1:] = Y_pred real_predictions.shape pt_ctr+=1 return real_predictions
input_var = T.tensor4('inputs') # Load the deep neural network module neural_nets = DCNN_network.DCNN_network() # Build the deep neural net (see the package in includes/) network = neural_nets.build_CNN_3_softmax(input_var) lasagne.layers.set_all_param_values(network, params) # Build theano function which inturn generates and compiles C code to run the network prediction_ = lasagne.layers.get_output(network, deterministic=True) predict_function = theano.function([input_var], prediction_) # Load the data builder object data_builder = BuildDataSet.dataset(Threshold=None) # This is the range of points in the central domain which can have a 33x33 around it. num_points = 4356 domain_points = (range(17, 83), range(17, 83)) # Arrange all pixel points to a list PixelPoints = [(x, y) for x in domain_points[0] for y in domain_points[1]] # Call the function to club consecutive days together sorted_days = data_builder.club_days() # Get the end of each consecutive string of days days_in_sorted = sorted_days.keys() # Sort the days
def make_predictions(yr=14): base_path = '../output/1_CNN_experiments/' # /Users/adityanagarajan/projects/nowcaster/output/1_CNN_experiments print os.path.exists(base_path + 'CPU_1_CNN_layer_max_pool_0_200.pkl') params = np.load(base_path + 'CPU_1_CNN_layer_max_pool_0_200.pkl') print len(params) input_var = T.tensor4('inputs') network, hidden_1 = build_DCNN_maxpool_softmax(input_var) lasagne.layers.set_all_param_values(network, params) prediction_ = lasagne.layers.get_output(network, deterministic=True) predict_function = theano.function([input_var], prediction_) data_builder = BuildDataSet.dataset(num_points=1000) # This is the range of points in the central domain which can # have a 33x33 around it. num_points = 4356 domain_points = (range(17, 83), range(17, 83)) PixelPoints = [(x, y) for x in domain_points[0] for y in domain_points[1]] PixelPoints = np.array(PixelPoints) storm_dates_all = data_builder.load_storm_days(yr) doy_strings = data_builder.club_days(storm_dates_all) days_in_sorted = doy_strings.keys() days_in_sorted.sort() print doy_strings ipw_files, refl_files = data_builder.sort_IPW_refl_files_imgs(yr) temp_ipw_files = filter( lambda x: re.findall('\d+', x)[1] in doy_strings['129'], ipw_files) # temp_refl_files = filter( lambda x: re.findall('\d+', x)[1] in doy_strings['129'], refl_files) temp_ipw_files = map(lambda x: '../data/dataset/20' + str(yr) + os.sep + x, temp_ipw_files) temp_refl_files = map( lambda x: '../data/dataset/20' + str(yr) + os.sep + x, temp_refl_files) # We define an array which contains the ground truth in column 1 # and the prediction probabilities in column 2 and 3 real_predictions = np.zeros((num_points, 91, 3)) pt_ctr = 0 for x_, y_ in zip(PixelPoints[:, 0], PixelPoints[:, 1]): print 'Predicting for point: (%d,%d)' % (x_, y_) temp_array = data_builder.build_features_and_truth_imgs( temp_ipw_files, temp_refl_files, x_, y_) ipw_refl_tensors = data_builder.arrange_frames_single(temp_array) X_test = ipw_refl_tensors[1] Y_test = ipw_refl_tensors[2] Y_pred = predict_function(X_test) real_predictions[pt_ctr, :, 0] = Y_test.reshape(91, ) # print Y_test.reshape(91,) # print np.argmax(Y_pred,axis=1) real_predictions[pt_ctr, :, 1:] = Y_pred real_predictions.shape pt_ctr += 1 return real_predictions
input_var = T.tensor4('inputs') # Load the deep neural network module neural_nets = DCNN_network.DCNN_network() # Build the deep neural net (see the package in includes/) network = neural_nets.build_CNN_3_softmax(input_var) lasagne.layers.set_all_param_values(network, params) # Build theano function which inturn generates and compiles C code to run the network prediction_ = lasagne.layers.get_output(network, deterministic=True) predict_function = theano.function([input_var], prediction_) # Load the data builder object data_builder = BuildDataSet.dataset(Threshold = None) # This is the range of points in the central domain which can have a 33x33 around it. num_points = 4356 domain_points = (range(17,83),range(17,83)) # Arrange all pixel points to a list PixelPoints = [(x,y) for x in domain_points[0] for y in domain_points[1]] # Call the function to club consecutive days together sorted_days = data_builder.club_days() # Get the end of each consecutive string of days days_in_sorted = sorted_days.keys() # Sort the days
elif yr == 16: storm_dates = np.load('../data/storm_dates_2016.npy').astype('int') for d in storm_dates: print d DFW.doytodate(int(yr),d[0]) new_dir = '../data/RadarData/NEXRAD/20' + str(yr) + os.sep + Months[int(DFW.mon) -1] + DFW.day if not os.path.exists(new_dir): os.mkdir(new_dir) os.chdir(new_dir) GetNEXRADfile(DFW.mon,DFW.day,DFW.yr,order_id = order_dict[yr]) os.chdir(initial) nexrad_files = KeepRequiredFiles(d[0],yr) # This commented code here delets the files which are inbetween # 00 and 30. Deletefiles(d[0],yr,nexrad_files) ## DFW.doytodate(int(yr),d[0]) ## file_path = '../data/RadarData/NEXRAD/20' + str(yr) + os.sep + Months[int(DFW.mon) -1] + DFW.day + os.sep ## nexrad_files = os.listdir(file_path) ConvertToNETCDF(d[0],yr,nexrad_files) # check_refl_files(new_dir) # decimated = det30minuteDecimated(d[0],yr) # averages = get30minuteaverages(d[0],yr) # np.save('../data/RadarData/Decimated/' +str(d[1]) + str(d[0]) + 'refl_decimated.npy',decimated) # np.save('../data/RadarData/Averages/' + str(d[1]) + str(d[0]) + 'refl_averages.npy',averages) if __name__ == '__main__': yr = [16] data_builder = BuildDataSet.dataset(num_points = 500) pixels = data_builder.sample_random_pixels() for y in yr: main(y)
import numpy as np import pandas as pd from matplotlib import pyplot as plt import os import cPickle import BuildDataSet import nowcast average_fields = [] indices_i = [(x*1089,(x+1)*1089)for x in range(4) ] indices_r = [(x*1089,(x+1)*1089)for x in range(4,8) ] data_builder = BuildDataSet.dataset() ''' >>> f = file('obj.save', 'rb') >>> loaded_obj = cPickle.load(f) >>> f.close() ''' base_file = 'data/TrainTest/RandomPoints/' file_list = os.listdir(base_file) file_list = filter(lambda x: x[-4:] == '.pkl' and x[:3] == 'IPW',file_list) print file_list for pkl_file in file_list: print pkl_file
network_file = file('../output/'+ network_file_name + '_' + str(exp_no) +'_' + str(ep + 1) + '.pkl','wb') pkl.dump(params,network_file,protocol = pkl.HIGHEST_PROTOCOL) network_file.close() f1 = file('../output/performance_metrics_' + network_file_name + '_' + str(exp_no) + '.pkl','wb') pkl.dump(performance_metrics,f1,protocol = pkl.HIGHEST_PROTOCOL) f1.close() def main(data_builder,make_data_set = False): training_blocks,validation_blocks = build_training_validation_sets(data_builder) if make_data_set: make_dataset_NN_2(data_builder) for i in range(4): print '-'*50 print 'Validation year = %s Month = %s'%tuple(blocks[i].split('_')) conv_net(training_blocks[i],validation_blocks[i],200,i) if __name__ == '__main__': data_builder = BuildDataSet.dataset(Threshold = 'real_valued',num_points = 500) main(data_builder)
from matplotlib import pyplot as plt import random import cPickle import nowcast import BuildDataSet region_dict = {} region_dict['region1'] = (range(33, 50), range(50, 66), 'r*') region_dict['region2'] = (range(50, 66), range(50, 66), 'g*') region_dict['region3'] = (range(33, 50), range(33, 50), 'b*') region_dict['region4'] = (range(50, 66), range(33, 50), 'y*') now_caster = nowcast.BuildNowcaster() data_builder = BuildDataSet.dataset() #fill_domain = ([x for x in range(33,66)if x not in range(50,55)],[y for y in range(33,66) if y not in range(50,55)]) def plot_domains(PixelX, PixelY, marker): central_chunk = (range(46, 54), range(46, 54)) #Pull out the noisy regions of the radar central_chunk_points = [(x_, y_) for x_ in central_chunk[0] for y_ in central_chunk[1]] PixelPoints = [(x, y) for x in PixelX for y in PixelY] PixelPoints = [
'../output/performance_metrics_' + network_file_name + '_' + str(exp_no) + '.pkl', 'wb') pkl.dump(performance_metrics, f1, protocol=pkl.HIGHEST_PROTOCOL) f1.close() def main(data_builder, make_data_set=False): training_blocks, validation_blocks = build_training_validation_sets( data_builder) if make_data_set: make_dataset_NN_2(data_builder) for i in range(4): print '-' * 50 print 'Validation year = %s Month = %s' % tuple(blocks[i].split('_')) conv_net(training_blocks[i], validation_blocks[i], 200, i) if __name__ == '__main__': data_builder = BuildDataSet.dataset(Threshold='real_valued', num_points=500) main(data_builder)