def main(): np.random.seed(7) t1 = time.time() image_path = config.image_path track_path = config.track_path track_dic_path = config.track_dic_path track_dict = load.load_json(track_dic_path) intensity_mean,intensity_std = config.intensity_mean, config.intensity_std batch_size = config.batch_size ModelCheckpoint_file = config.ModelCheckpoint_file look_back = config.look_back img_rows,img_cols = config.img_rows,config.img_cols subdir_list = [] hist_path = config.hist_path # train_x = np.random.uniform(0,1,(17, 3, 1, 512, 512)) # train_y = np.random.uniform(0,1,(17,1)) # print (train_x) # train_x = np.array(train_x,dtype = 'float32') # train_y = np.array(train_y,dtype= 'float32') # hist = model.fit(train_x, train_y, nb_epoch=1, batch_size=batch_size, verbose=2, validation_split=0.1,shuffle=False) """ count the number of image in each typhoon sequence """ image_number_dictionary={} for subdirs, dirs, files in os.walk(image_path): # print (subdirs) subdir_list.append(subdirs) for subdir in subdir_list: count = 0 for subdirs, dirs, files in os.walk(subdir): for file in files: count += 1 key = subdir.split('/')[-1] image_number_dictionary[key] = count if count < 24: print (key,count) # print (image_number_dictionary) """ check the number of images equals the number of track data? """ # for subdir in subdir_list: # for subdirs, dirs, files in os.walk(subdir): # for file in files: # # print (file) # [k1, k2] = file.split("-")[:2] # key = "".join((k1,k2)) # try: # mark = track_dict[key] # except KeyError: # print (file +'do not have track value') # for k in track_dict.keys(): # k2 = k[-6:] # typhoon number # k1 = k[:-6] # file = k1 +'-' + k2 +'*' # file_path = image_path + k2 +'/' + file # if not os.path.isfile(file_path): # print (file_path not exists) track_dict_number ={} equal_track_image_list = [] not_equal_track_image_list = [] for subdir in subdir_list: key =subdir.split('/')[-1] if len(key) > 0 and key not in ['201620','201621','201622']: track_file_path = track_path + key+'.itk' with open(track_file_path,'rb') as tsv_file: tsv_reader = csv.reader(tsv_file, delimiter='\t') count = 0 for row in tsv_reader: count += 1 track_dict_number[key] = count if count != image_number_dictionary[key]: not_equal_track_image_list.append(key) # print (key,count,image_number_dictionary[key],'not equal') if count == image_number_dictionary[key]: # print (key,count,image_number_dictionary[key],' equal') equal_track_image_list.append(key) # print (not_equal_track_image_list,'not_equal_track_image_list') # print (equal_track_image_list,'equal_track_image_list') print (len(equal_track_image_list),'lenth of eqaual track image list') # "check if track file difference is one hour, result is yes for both equal and not_eqaul_image_list " for key in not_equal_track_image_list: ts =[] track_file_path = track_path + key+'.itk' with open(track_file_path,'rb') as tsv_file: tsv_reader = csv.reader(tsv_file, delimiter='\t') for row in tsv_reader: yy = row[0] mm = row[1] dd = row[2] hh = row[3] t = datetime.datetime.strptime(yy +":" + mm +":" + dd +':' +hh, '%Y:%m:%d:%H') ts.append(t) tmp = ts[0] for i in range(1,len(ts)): dif = (ts[i] - tmp).total_seconds() # print (dif,'dif') if dif != 3600: print (dif,i,key) tmp = ts[i] # break data_folder_path = config.data_folder_path if not os.path.exists(data_folder_path): equal_track_image_list = np.array(equal_track_image_list) np.random.shuffle(equal_track_image_list) equal_track_image_list = list(equal_track_image_list) # equal_track_image_list = equal_track_image_list[:2] train_folder = equal_track_image_list[:int(0.9 * len(equal_track_image_list))] test_folder = equal_track_image_list[int(0.9* len(equal_track_image_list)):] with open(data_folder_path,'w') as f: json.dump({'train_folder':train_folder,'test_folder': test_folder},f) print ('data_folder_path dumped to: ',data_folder_path) else: with open(data_folder_path,'r') as f: data_folder = json.load(f) train_folder = data_folder['train_folder'] test_folder = data_folder['test_folder'] print ('load data folder from: ' , data_folder_path) """ data_path = config.data_path if not os.path.exists(data_path): train_x =[] train_y=[] test_x = [] test_y = [] vgg_model = VGG_16('vgg16_weights.h5') sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True) vgg_model.compile(optimizer=sgd, loss='categorical_crossentropy') for key in test_folder: print(key) image_folder = image_path + key +'/' track_file_path = track_path + key + '.itk' dataset_image = prepare_dataset.dataset_2(image_folder) print (dataset_image.shape) dataset_input = get_fc2(vgg_model,dataset_image) dataset_intensity = prepare_dataset.dataset_1(track_file_path) dataset_intensity = prepare_dataset.normalize_intensity(dataset_intensity,intensity_mean,intensity_std) print (dataset_image.shape,'dataset_image.shape') print (dataset_intensity.shape,'dataset_intensity') data_x,data_y = prepare_dataset.create_dataset_2(dataset_input, dataset_intensity,look_back = look_back) test_x += data_x test_y += data_y # print test_y.shape,test_y # train_histss =[] # validation_histss=[] for key in train_folder: print(key) image_folder = image_path + key +'/' track_file_path = track_path + key + '.itk' dataset_image = prepare_dataset.dataset_2(image_folder) dataset_input = get_fc2(vgg_model,dataset_image) dataset_intensity = prepare_dataset.dataset_1(track_file_path) dataset_intensity = prepare_dataset.normalize_intensity(dataset_intensity,intensity_mean,intensity_std) print (dataset_image.shape,'dataset_image.shape') print (dataset_intensity.shape,'dataset_intensity') data_x,data_y = prepare_dataset.create_dataset_2(dataset_input, dataset_intensity,look_back = look_back) # print (len(data_x)) train_x += data_x train_y += data_y data_x = np.array(data_x) data_y = np.array(data_y) # print (data_x.shape,data_y.shape,'data_x,data_y') # train_hists=[] # validation_hists=[] # for i in range(20): # print('start train') # hist = model.fit(data_x, data_y, nb_epoch=1, batch_size=batch_size, verbose=2, validation_split=0.1,shuffle=False) # model.reset_states() # train_hists.append(hist.history['loss'][0]) # validation_hists.append(hist.history['val_loss'][0]) # # print (hists,'hists') # train_histss.append(train_hists) # validation_histss.append(validation_hists) # print (train_histss,'train_histss') # print (validation_histss, 'validation_histss') # print ((data_x.shape),data_y.shape) train_x = np.array(train_x,dtype = 'float32') train_y = np.array(train_y,dtype = 'float32') test_x = np.array(test_x,dtype = 'float32') test_y = np.array(test_y,dtype = 'float32') hf = h5py.File(data_path) hf.create_dataset('train_x',data = train_x) hf.create_dataset('train_y',data = train_y) hf.create_dataset('test_x', data= test_x) hf.create_dataset('test_y', data= test_y) hf.close() print ('dump train test data to' ,data_path) else: with h5py.File(data_path,'r') as hf: train_x = np.array(hf.get('train_x')) train_y = np.array(hf.get('train_y')) test_x = np.array(hf.get('test_x')) test_y = np.array(hf.get('test_y')) print ('loaded train test data from ', data_path) print (train_x.shape,train_y.shape) print (test_x.shape,test_y.shape) """ # get train test data from pre_built dataset dataset_image_path = 'test_file/dataset_imageset.hdf5' dataset_type_path = 'test_file/dataset_type.hdf5' hf_image = h5py.File(dataset_image_path) hf_type = h5py.File(dataset_type_path) train_x = [] train_y = [] test_x = [] test_y = [] vgg_fc2_mean = config.vgg_fc2_mean vgg_fc2_std = config.vgg_fc2_std """ dataset_imageset 0.423964 mean data 0.569374 std data 0.0 min 4.71836 max """ # train_folder =train_folder[:2] # test_folder = test_folder[:2] for key in train_folder: print(key) dataset_image = np.array(hf_image.get(key)) dataset_image = prepare_dataset.normalize_intensity(dataset_image,vgg_fc2_mean,vgg_fc2_std) #normalize image (the same function of normalize intensity) dataset_type = np.array(hf_type.get(key)) if len(dataset_image) > look_back: data_x,data_y = prepare_dataset.extend_dataset_2(dataset_image, dataset_type,look_back = look_back) train_x += data_x train_y += data_y for key in test_folder: print (key) dataset_image = np.array(hf_image.get(key)) dataset_image = prepare_dataset.normalize_intensity(dataset_image,vgg_fc2_mean,vgg_fc2_std) dataset_type = np.array(hf_type.get(key)) if len(dataset_image) > look_back: data_x,data_y = prepare_dataset.extend_dataset_2(dataset_image, dataset_type,look_back = look_back) test_x += data_x test_y += data_y hf_type.close() hf_image.close() # train = train_x + test_x train_x = np.array(train_x,dtype = 'float32') train_y = np.array(train_y,dtype = 'float32') test_x = np.array(test_x,dtype = 'float32') test_y = np.array(test_y,dtype = 'float32') print (train_x.shape,train_y.shape) print (test_x.shape,test_y.shape) # nb_classes = max(len(set(train_y)), len(set(test_y))) # print set(train_y) # print set(test_y) # print nb_classes,'nb_classes' model = pretrain_model(look_back,batch_size) if os.path.exists(ModelCheckpoint_file): print ('load load_weights',ModelCheckpoint_file) model.load_weights(ModelCheckpoint_file) print(model.summary()) y_train = np_utils.to_categorical(train_y, None) y_test = np_utils.to_categorical(test_y, None) print y_train.shape train_loss_hists=[] validation_loss_hists=[] train_acc_hists=[] validation_acc_hists=[] val_acc = sys.float_info.min for i in range(1000): print (i,'epoch') # ModelCheckpoint_file = 'test_file/orig_weights_lstm_1.0_image_lookback_'+str(look_back)+str(i)+'_whole_equal.hdf5' # print('start train') hist = model.fit(train_x, y_train, nb_epoch=1, batch_size=batch_size, verbose=2, validation_split=0.1,shuffle=False) print hist.history model.reset_states() train_loss_hists.append(hist.history['loss'][0]) validation_loss_hists.append(hist.history['val_loss'][0]) train_acc_hists.append(hist.history['acc'][0]) validation_acc_hists.append(hist.history['val_acc'][0]) if val_acc < hist.history['val_acc'][0]: model.save_weights(ModelCheckpoint_file) print(i,val_acc,'->',hist.history['val_acc'][0],'save_weights',ModelCheckpoint_file) val_acc = hist.history['val_acc'][0] # print (train_hists,'train_hists') # print (validation_hists, 'validation_hists') with open(hist_path,'w') as f: json.dump({'train_loss':train_loss_hists,'val_loss':validation_loss_hists,'train_acc':train_acc_hists,'val_acc':validation_acc_hists},f) # hist = model.fit(train_x, train_y, nb_epoch=2, batch_size=batch_size, verbose=2, validation_split = 0.1,shuffle=False) # break # with open(hist_path,'w') as j: # json.dump(hist.history,j) # validation_hists_least_index = validation_hists.index(min(validation_hists)) # print ('ModelCheckpoint_file','test_file/orig_weights_lstm_1.0_image_lookback_'+str(look_back)+str(validation_hists_least_index)+'_whole_equal.hdf5') # model.load_weights('test_file/orig_weights_lstm_1.0_image_lookback_'+str(look_back)+str(validation_hists_least_index)+'_whole_equal.hdf5') print('load_weights',ModelCheckpoint_file) model.load_weights(ModelCheckpoint_file) trainPredict = model.predict(train_x, batch_size=batch_size) model.reset_states() testPredict = model.predict(test_x, batch_size=batch_size) # # invert predictions # # calculate root mean squared error train_predictions = np.argmax(trainPredict, 1) train_labels = np.argmax(y_train, 1) test_predictions = np.argmax(testPredict, 1) test_labels = np.argmax(y_test, 1) print(look_back,'look_back') train_accuracy, train_cm = get_accuracy(train_predictions, train_labels, True) test_accuracy, test_cm = get_accuracy(test_predictions, test_labels, True) print (train_accuracy,'train accuracy') print(train_cm,'train_cm') print (test_accuracy,'test accuracy') print(test_cm,'test_cm') train_cm = train_cm.tolist() train_confusion_matrix_path = 'test_file/confusion_matrix_train_extend_normalize_'+ str( look_back) +'.json' with open(train_confusion_matrix_path, 'w') as f: json.dump(train_cm,f) test_cm = test_cm.tolist() test_confusion_matrix_path = 'test_file/confusion_matrix_test_extend_normalize_'+ str(look_back) +'.json' with open(test_confusion_matrix_path, 'w') as f: json.dump(test_cm,f) t2 = time.time() print ("using %s seconds" % (t2-t1))
def main(): np.random.seed(7) # trackDictPath = config.track_dic_path # track_dict = load.load_json(trackDictPath) track_path = config.track_path suspicious_file_list_path = config.suspicious_file_list_path suspicious_file_list = load.load_json(suspicious_file_list_path) train_validation_test_subdirs_split = config.train_validation_test_subdirs_split intensity_mean, intensity_std = config.intensity_mean, config.intensity_std batch_size = config.batch_size ModelCheckpoint_file = 'test_file/orig_weights_lstm_1.0_lookback_24.hdf5' print('ModelCheckpoint_file', ModelCheckpoint_file) #config.ModelCheckpoint_file look_back = 1 batch_size = 1 print(look_back, 'look_back') file_list = [] model = lstm_model_1(batch_size, look_back) # model.load_weights(ModelCheckpoint_file) for subdir, dirs, files in os.walk(track_path): for file in files: file_path = os.path.join(subdir, file) file_list.append(file_path) file_list = np.array(file_list) np.random.shuffle(file_list) file_list = list(file_list) # file_list = file_list[:10] # print (file_list) # for file in file_list: # if len(file) <=2: # print (file) # print (file_list.index(file)) file_list = file_list[:10] train_file_list = file_list[:int(0.9 * len(file_list))] # validation_file_list = file_list[int(0.85*len(file_list)):int(0.9*len(file_list))] test_file_list = file_list[int(0.9 * len(file_list)):] print(len(train_file_list)) # print (len(validation_file_list)) print(len(test_file_list)) testX = [] testY = [] # dataset_count = 0 train_histss = [] validation_histss = [] train_file_list_copy = train_file_list # trainXS=np.array([]).reshape(0,look_back) # print (trainXS.shape,'trainxs shape') # trainYS = np.array([]).reshape(0,1) trainXS = [] trainYS = [] for i in np.arange(0, len(train_file_list_copy), 12): #len(train_file_list_copy) trainX = [] trainY = [] train_hists = [] validation_hists = [] print(i, 'i') train_file_list = train_file_list_copy[i:i + 12] # print len(train_file_list) for file in train_file_list: # print file # try: data = prepare_dataset.dataset_1(file) data = prepare_dataset.normalize_intensity(data, intensity_mean, intensity_std) # data = list(data) trainXx, trainYy = prepare_dataset.create_dataset(data, look_back) trainX += trainXx trainY += trainYy # print (trainX,'trainX') # print (trainY,'trainY') # break # dataset_count += data.shape[0] # except: # print(file,'error') trainX = np.array(trainX, dtype='float32') trainY = np.array(trainY, dtype='float32') # print (trainX.shape) # print(trainY.shape,'trainY SHAPE') trainX = np.reshape(trainX, (trainX.shape[0], trainX.shape[1], 1)) # trainXS = np.vstack((trainXS, trainX)) # trainYS = np.vstack((trainYS, trainY)) # print (trainXS.shape,'trainxs shape') # break # return trainXS.append(trainX) trainYS.append(trainY) """ training """ for i in range(100): hist = model.fit(trainX, trainY, nb_epoch=1, batch_size=batch_size, verbose=2, validation_split=0.1, shuffle=False) model.reset_states() train_hists.append(hist.history['loss'][0]) validation_hists.append(hist.history['val_loss'][0]) # print (hists,'hists') train_histss.append(train_hists) validation_histss.append(validation_hists) print(train_histss, 'train_histss') print(validation_histss, 'validation_histss') """
def main(): np.random.seed(7) # trackDictPath = config.track_dic_path # track_dict = load.load_json(trackDictPath) track_path = config.track_path suspicious_file_list_path = config.suspicious_file_list_path suspicious_file_list = load.load_json(suspicious_file_list_path) train_validation_test_subdirs_split = config.train_validation_test_subdirs_split intensity_mean, intensity_std = config.intensity_mean, config.intensity_std batch_size = config.batch_size ModelCheckpoint_file = config.ModelCheckpoint_file train_predict_image = config.train_predict_image test_predict_image = config.test_predict_image look_back = 3 file_list = [] for subdir, dirs, files in os.walk(track_path): for file in files: file_path = os.path.join(subdir, file) file_list.append(file_path) file_list = np.array(file_list) np.random.shuffle(file_list) file_list = list(file_list) file_list = file_list[:10] # print (file_list) # for file in file_list: # if len(file) <=2: # print (file) # print (file_list.index(file)) # file_list = file_list[:10] train_file_list = file_list[:int(0.9 * len(file_list))] test_file_list = file_list[int(0.9 * len(file_list)):] # print(train_file_list) trainX = [] trainY = [] testX = [] testY = [] dataset_count = 0 for file in train_file_list: try: data = prepare_dataset.dataset_1(file) data = prepare_dataset.normalize_intensity(data, intensity_mean, intensity_std) # data = list(data) trainXx, trainYy = prepare_dataset.create_dataset(data, look_back) trainX += trainXx trainY += trainYy dataset_count += data.shape[0] except: print(file) for file in test_file_list: try: data = prepare_dataset.dataset_1(file) data = prepare_dataset.normalize_intensity(data, intensity_mean, intensity_std) # data = list(data) testXx, testYy = prepare_dataset.create_dataset(data, look_back) testX += testXx testY += testYy dataset_count += data.shape[0] except: print(file) trainX = np.array(trainX, dtype='float32') trainY = np.array(trainY, dtype='float32') testX = np.array(testX, dtype='float32') testY = np.array(testY, dtype='float32') print(trainX.shape) print(testX.shape) trainX = np.reshape(trainX, (trainX.shape[0], trainX.shape[1], 1)) testX = np.reshape(testX, (testX.shape[0], testX.shape[1], 1)) batch_size = 1 model = Sequential() model.add( LSTM(4, batch_input_shape=(batch_size, look_back, 1), stateful=True)) model.add(Dense(3)) model.compile(loss='mean_squared_error', optimizer='adam') # checkpointer = ModelCheckpoint(filepath=ModelCheckpoint_file, verbose=2, save_best_only=True) hists = [] for i in range(10): hist = model.fit(trainX, trainY, nb_epoch=1, batch_size=batch_size, verbose=2, shuffle=False) model.reset_states() hists.append(hist.history['loss'][0]) print(hists, 'hists') # model.save_weights(ModelCheckpoint_file) # make predictions trainPredict = model.predict(trainX, batch_size=batch_size) model.reset_states() testPredict = model.predict(testX, batch_size=batch_size) # invert predictions trainPredict = prepare_dataset.reverse_normalize_intensity( trainPredict, intensity_mean, intensity_std) trainY = prepare_dataset.reverse_normalize_intensity( trainY, intensity_mean, intensity_std) testPredict = prepare_dataset.reverse_normalize_intensity( testPredict, intensity_mean, intensity_std) testY = prepare_dataset.reverse_normalize_intensity( testY, intensity_mean, intensity_std) # calculate root mean squared error # print (trainPredict[:,0], 'trainPredict') # print (trainPredict.shape,'len_train_predict') # print(trainY[0],'trainY') trainScore = math.sqrt(mean_squared_error(trainY, trainPredict[:, 0])) print('Train Score: %.2f RMSE' % (trainScore)) testScore = math.sqrt(mean_squared_error(testY, testPredict[:, 0])) print('Test Score: %.2f RMSE' % (testScore)) dataset = np.zeros((dataset_count, 1), dtype='float32') # trainPredictPlot = np.empty_like(dataset) # trainPredictPlot[:, :] = np.nan # trainPredictPlot[look_back:len(trainPredict)+look_back, :] = trainPredict # # shift test predictions for plotting # testPredictPlot = np.empty_like(dataset) # testPredictPlot[:, :] = np.nan # testPredictPlot[len(trainPredict)+(look_back*2)+1:len(dataset)-1, :] = testPredict # # plt.plot(dataset)) fig = plt.figure() plt.title('train_predicts_look_back') plt.plot(list(trainPredict[:, 0]), 'r--', label='train_predict') plt.plot(list(trainY), 'g--', label='train') plt.legend(loc='upper left', shadow=True) plt.xlabel('typhoon_image') plt.ylael('typhoon intensity') plt.savefig(train_predict_image) plt.close(fig) fig = plt.figure() plt.title('test_predicts_look_back') plt.plot(list(testPredict[:, 0]), 'r--', label='test_predict') plt.plot(list(testY), 'g--', label='test') plt.xlabel('typhoon_image') plt.ylael('typhoon intensity') plt.legend(loc='upper left', shadow=True) plt.savefig(test_predict_image) plt.close(fig)
def main(): np.random.seed(7) t1 = time.time() image_path = config.image_path track_path = config.track_path track_dic_path = config.track_dic_path track_dict = load.load_json(track_dic_path) intensity_mean, intensity_std = config.intensity_mean, config.intensity_std batch_size = config.batch_size ModelCheckpoint_file = config.ModelCheckpoint_file look_back = config.look_back img_rows, img_cols = config.img_rows, config.img_cols subdir_list = [] hist_path = config.hist_path mean_v, std_v = config.mean_v, config.std_v intensity_mean, intensity_std = config.intensity_mean, config.intensity_std model = pretrain_model(look_back, batch_size) if os.path.exists(ModelCheckpoint_file): print('load load_weights', ModelCheckpoint_file) model.load_weights(ModelCheckpoint_file) print(model.summary()) # train_x = np.random.uniform(0,1,(17, 3, 1, 512, 512)) # train_y = np.random.uniform(0,1,(17,1)) # print (train_x) # train_x = np.array(train_x,dtype = 'float32') # train_y = np.array(train_y,dtype= 'float32') # hist = model.fit(train_x, train_y, nb_epoch=1, batch_size=batch_size, verbose=2, validation_split=0.1,shuffle=False) """ count the number of image in each typhoon sequence """ image_number_dictionary = {} for subdirs, dirs, files in os.walk(image_path): # print (subdirs) subdir_list.append(subdirs) for subdir in subdir_list: count = 0 for subdirs, dirs, files in os.walk(subdir): for file in files: count += 1 key = subdir.split('/')[-1] image_number_dictionary[key] = count if count < 24: print(key, count) # print (image_number_dictionary) """ check the number of images equals the number of track data? """ # for subdir in subdir_list: # for subdirs, dirs, files in os.walk(subdir): # for file in files: # # print (file) # [k1, k2] = file.split("-")[:2] # key = "".join((k1,k2)) # try: # mark = track_dict[key] # except KeyError: # print (file +'do not have track value') # for k in track_dict.keys(): # k2 = k[-6:] # typhoon number # k1 = k[:-6] # file = k1 +'-' + k2 +'*' # file_path = image_path + k2 +'/' + file # if not os.path.isfile(file_path): # print (file_path not exists) track_dict_number = {} equal_track_image_list = [] not_equal_track_image_list = [] for subdir in subdir_list: key = subdir.split('/')[-1] if len(key) > 0 and key not in ['201620', '201621', '201622']: track_file_path = track_path + key + '.itk' with open(track_file_path, 'rb') as tsv_file: tsv_reader = csv.reader(tsv_file, delimiter='\t') count = 0 for row in tsv_reader: count += 1 track_dict_number[key] = count if count != image_number_dictionary[key]: not_equal_track_image_list.append(key) # print (key,count,image_number_dictionary[key],'not equal') if count == image_number_dictionary[key]: # print (key,count,image_number_dictionary[key],' equal') equal_track_image_list.append(key) # print (not_equal_track_image_list,'not_equal_track_image_list') # print (equal_track_image_list,'equal_track_image_list') print(len(equal_track_image_list), 'lenth of eqaual track image list') # "check if track file difference is one hour, result is yes for both equal and not_eqaul_image_list " for key in not_equal_track_image_list: ts = [] track_file_path = track_path + key + '.itk' with open(track_file_path, 'rb') as tsv_file: tsv_reader = csv.reader(tsv_file, delimiter='\t') for row in tsv_reader: yy = row[0] mm = row[1] dd = row[2] hh = row[3] t = datetime.datetime.strptime( yy + ":" + mm + ":" + dd + ':' + hh, '%Y:%m:%d:%H') ts.append(t) tmp = ts[0] for i in range(1, len(ts)): dif = (ts[i] - tmp).total_seconds() # print (dif,'dif') if dif != 3600: print(dif, i, key) tmp = ts[i] # break dataset_imageset_path = 'test_file/dataset_image_unequal.hdf5' dataset_intensity_path = 'test_file/dataset_intensity_unequal.hdf5' hf_image = h5py.File(dataset_imageset_path) hf_intensity = h5py.File(dataset_intensity_path) vgg_model = VGG_16('vgg16_weights.h5') sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True) vgg_model.compile(optimizer=sgd, loss='categorical_crossentropy') for key in not_equal_track_image_list: # # for key in equal_track_image_list: image_folder = image_path + key + '/' # dataset_x,dataset_y = prepare_dataset.dataset_1_2(image_folder,track_dict) # print dataset_x.shape # print dataset_y.shape # break file_path_list = [] # print key dataset_image = [] dataset_intensity = [] for subdirs, dirs, files in os.walk(image_folder): for file in files: file_path = os.path.join(subdirs, file) file_path_list.append(file_path) sorted_file_list = sorted( file_path_list, key=lambda x: int(x.split('/')[-1].split('-')[-4])) # print (len(sorted_file_list),'len of sorted_file_list') ts = [] intensities = [] for file_path in sorted_file_list: yymmddhh = file_path.split('/')[-1].split('-')[-4] track_key = yymmddhh + key intensities.append(float(track_dict[track_key][-2])) t = datetime.datetime.strptime(yymmddhh, '%Y%m%d%H') ts.append(t) # print len(ts),'len ts' tmp = ts[0] orig_image = load.get_x(sorted_file_list, img_rows, img_cols, mean_v, std_v) tmp_image = orig_image[0] # dataset_input = get_fc2(vgg_model,dataset_image) # dataset_input = np.array(dataset_input) dataset_image.append(orig_image[0]) dataset_intensity.append(intensities[0]) for i in range(1, len(ts)): dif = (ts[i] - tmp).total_seconds() # print (dif,'dif') if dif != 3600: print(dif / 3600.0, i, key, ts[i]) for j in range(1, int(dif / 3600.0)): t2 = tmp + datetime.timedelta(seconds=3600) yy = t2.year mm = str(t2.month).zfill(2) dd = str(t2.day).zfill(2) hh = str(t2.hour).zfill(2) yymmddhh = str(yy) + mm + dd + hh track_key = yymmddhh + key intensity = float(track_dict[track_key][-2]) image = (1 - (float(j) / (dif / 3600.0))) * tmp_image + ( float(j) / (dif / 3600.0)) * orig_image[i] dataset_image.append(image) dataset_intensity.append(intensity) dataset_image.append(orig_image[i]) dataset_intensity.append(intensities[i]) tmp = ts[i] tmp_image = orig_image[i] dataset_image = np.array(dataset_image) dataset_input = get_fc2(vgg_model, dataset_image) dataset_intensity = np.array(dataset_intensity) dataset_intensity = prepare_dataset.normalize_intensity( dataset_intensity, intensity_mean, intensity_std) hf_image.create_dataset(key, data=dataset_input) hf_intensity.create_dataset(key, data=dataset_intensity) hf_image.close() hf_intensity.close() t2 = time.time() print("using %s seconds" % (t2 - t1))