def main():
	np.random.seed(7)
	t1 = time.time()
	image_path = config.image_path
	track_path = config.track_path
	track_dic_path = config.track_dic_path
	track_dict = load.load_json(track_dic_path)
	intensity_mean,intensity_std = config.intensity_mean, config.intensity_std
	batch_size = config.batch_size
	ModelCheckpoint_file = config.ModelCheckpoint_file
	look_back = config.look_back
	img_rows,img_cols = config.img_rows,config.img_cols
	subdir_list = []
	hist_path = config.hist_path

	# train_x = np.random.uniform(0,1,(17, 3, 1, 512, 512))
	# train_y = np.random.uniform(0,1,(17,1))
	# print (train_x)
	# train_x = np.array(train_x,dtype = 'float32')
	# train_y = np.array(train_y,dtype= 'float32')
	# hist = model.fit(train_x, train_y, nb_epoch=1, batch_size=batch_size, verbose=2, validation_split=0.1,shuffle=False)

	"""
	count the number of image in each typhoon sequence
	"""
	image_number_dictionary={}
	for  subdirs, dirs, files in os.walk(image_path):
		# print (subdirs)
		subdir_list.append(subdirs)
	for subdir in subdir_list:
		count = 0
		for subdirs, dirs, files in os.walk(subdir):
			for file in files:
				count += 1
		key = subdir.split('/')[-1]
		image_number_dictionary[key] = count
		if count < 24:
			print (key,count)
	# print (image_number_dictionary)

	"""
	check the number of images equals the number of track data?
	"""
	# for subdir in subdir_list:
	# 	for subdirs, dirs, files in os.walk(subdir):
	# 		for file in files:
	# 			# print (file)
	# 			[k1, k2] = file.split("-")[:2]
	# 			key = "".join((k1,k2))
	# 			try:
	# 				mark = track_dict[key]
	# 			except KeyError:
	# 				print (file +'do not have track value')
	

# for k in track_dict.keys():
# 	k2 = k[-6:] # typhoon number
# 	k1 = k[:-6]
# 	file = k1 +'-' + k2 +'*'
# 	file_path = image_path + k2 +'/' + file
# 	if not os.path.isfile(file_path):
# 		print (file_path not exists)
	track_dict_number ={}
	equal_track_image_list = []
	not_equal_track_image_list = []
	for subdir in subdir_list:
		key =subdir.split('/')[-1] 

		if len(key) > 0 and key not in ['201620','201621','201622']:
			track_file_path = track_path + key+'.itk'
			with open(track_file_path,'rb') as tsv_file:
				tsv_reader = csv.reader(tsv_file, delimiter='\t')
				count = 0
				for row in tsv_reader:
					count += 1
				track_dict_number[key] = count
				if count != image_number_dictionary[key]:
					not_equal_track_image_list.append(key)
					# print (key,count,image_number_dictionary[key],'not equal')
				if count == image_number_dictionary[key]:
					# print  (key,count,image_number_dictionary[key],' equal')
					equal_track_image_list.append(key)
	# print (not_equal_track_image_list,'not_equal_track_image_list')
	# print (equal_track_image_list,'equal_track_image_list')
		
	print (len(equal_track_image_list),'lenth of eqaual track image list')
	# "check if track file difference is one hour, result is yes for both equal and not_eqaul_image_list "

	for key in not_equal_track_image_list:
			ts =[]
			track_file_path = track_path + key+'.itk'
			with open(track_file_path,'rb') as tsv_file:
				tsv_reader = csv.reader(tsv_file, delimiter='\t')
				for row in tsv_reader:
					yy = row[0]
					mm = row[1]
					dd = row[2]
					hh = row[3]
					t = datetime.datetime.strptime(yy +":" + mm +":" + dd +':' +hh, '%Y:%m:%d:%H')
					ts.append(t)
			tmp = ts[0]
			for i in range(1,len(ts)):
				dif = (ts[i] - tmp).total_seconds()
				# print (dif,'dif')
				if dif != 3600:
					print (dif,i,key)
				tmp = ts[i]
			# break
	data_folder_path = config.data_folder_path
	if not os.path.exists(data_folder_path): 
		equal_track_image_list = np.array(equal_track_image_list)
		np.random.shuffle(equal_track_image_list)
		equal_track_image_list = list(equal_track_image_list)
		# equal_track_image_list = equal_track_image_list[:2]
		train_folder = equal_track_image_list[:int(0.9 * len(equal_track_image_list))]
		test_folder = equal_track_image_list[int(0.9* len(equal_track_image_list)):]
		with open(data_folder_path,'w') as f:
			json.dump({'train_folder':train_folder,'test_folder': test_folder},f)
			print ('data_folder_path dumped to: ',data_folder_path)
	else:
		with open(data_folder_path,'r') as f:
			data_folder = json.load(f)
			train_folder = data_folder['train_folder']
			test_folder = data_folder['test_folder']
			print ('load data folder from: ' , data_folder_path)




	"""
	data_path = config.data_path
	
	if not os.path.exists(data_path):
		train_x =[]
		train_y=[]
		test_x = []
		test_y = []
		vgg_model = VGG_16('vgg16_weights.h5')
		sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True)
	   	vgg_model.compile(optimizer=sgd, loss='categorical_crossentropy')
		for key in test_folder:
			print(key)
			image_folder = image_path + key +'/'
			track_file_path = track_path + key + '.itk'
			dataset_image = prepare_dataset.dataset_2(image_folder)
			print (dataset_image.shape)
			dataset_input = get_fc2(vgg_model,dataset_image)
			dataset_intensity = prepare_dataset.dataset_1(track_file_path)
			dataset_intensity = prepare_dataset.normalize_intensity(dataset_intensity,intensity_mean,intensity_std)
			print (dataset_image.shape,'dataset_image.shape')
			print (dataset_intensity.shape,'dataset_intensity')
			data_x,data_y = prepare_dataset.create_dataset_2(dataset_input, dataset_intensity,look_back = look_back)
			test_x += data_x
			test_y += data_y
		# print test_y.shape,test_y
		# train_histss =[]
		# validation_histss=[]
		for key in train_folder:
			print(key)
			image_folder = image_path + key +'/'
			track_file_path = track_path + key + '.itk'
			dataset_image = prepare_dataset.dataset_2(image_folder)
			dataset_input = get_fc2(vgg_model,dataset_image)
			dataset_intensity = prepare_dataset.dataset_1(track_file_path)
			dataset_intensity = prepare_dataset.normalize_intensity(dataset_intensity,intensity_mean,intensity_std)
			print (dataset_image.shape,'dataset_image.shape')
			print (dataset_intensity.shape,'dataset_intensity')
			data_x,data_y = prepare_dataset.create_dataset_2(dataset_input, dataset_intensity,look_back = look_back)
			# print (len(data_x))
			train_x += data_x
			train_y += data_y
			data_x = np.array(data_x)
			data_y = np.array(data_y)
			# print (data_x.shape,data_y.shape,'data_x,data_y')
			# train_hists=[]
			# validation_hists=[]
			# for i in range(20):
			# 	print('start train')
			# 	hist = model.fit(data_x, data_y, nb_epoch=1, batch_size=batch_size, verbose=2, validation_split=0.1,shuffle=False)
			# 	model.reset_states()
			# 	train_hists.append(hist.history['loss'][0])
			# 	validation_hists.append(hist.history['val_loss'][0])
			# # print (hists,'hists')
			# train_histss.append(train_hists)
			# validation_histss.append(validation_hists)

		# print (train_histss,'train_histss')
		# print (validation_histss, 'validation_histss')
		
			# print ((data_x.shape),data_y.shape)
		train_x = np.array(train_x,dtype = 'float32')
		train_y = np.array(train_y,dtype = 'float32')
		test_x = np.array(test_x,dtype = 'float32')
		test_y = np.array(test_y,dtype = 'float32')
		
		hf = h5py.File(data_path)
		hf.create_dataset('train_x',data = train_x)
		hf.create_dataset('train_y',data = train_y)
		hf.create_dataset('test_x', data= test_x)
		hf.create_dataset('test_y', data= test_y)
		hf.close()
		print ('dump train test data to' ,data_path)

	else:
		with h5py.File(data_path,'r') as hf:
			train_x = np.array(hf.get('train_x'))
			train_y = np.array(hf.get('train_y'))
			test_x = np.array(hf.get('test_x'))
			test_y = np.array(hf.get('test_y'))
		print ('loaded train test data from ', data_path)
	print (train_x.shape,train_y.shape)
	print (test_x.shape,test_y.shape)
	"""

	# get train test data from pre_built dataset
	dataset_image_path = 'test_file/dataset_imageset.hdf5'
	dataset_type_path = 'test_file/dataset_type.hdf5'

	hf_image = h5py.File(dataset_image_path)

	hf_type = h5py.File(dataset_type_path)
	train_x = []
	train_y = []
	test_x = []
	test_y = []

	vgg_fc2_mean = config.vgg_fc2_mean
	vgg_fc2_std = config.vgg_fc2_std
	"""
	dataset_imageset
	0.423964 mean data
	0.569374 std data
	0.0 min
	4.71836 max
	"""
	# train_folder =train_folder[:2]
	# test_folder = test_folder[:2]
	for key in train_folder:
		print(key)
		dataset_image = np.array(hf_image.get(key))
		dataset_image = prepare_dataset.normalize_intensity(dataset_image,vgg_fc2_mean,vgg_fc2_std) #normalize image (the same function of normalize intensity)
		dataset_type = np.array(hf_type.get(key))

		if len(dataset_image) > look_back:
			data_x,data_y = prepare_dataset.extend_dataset_2(dataset_image, dataset_type,look_back = look_back)
			train_x += data_x
			train_y += data_y

	for key in test_folder:
		print (key)
		dataset_image = np.array(hf_image.get(key))
		dataset_image = prepare_dataset.normalize_intensity(dataset_image,vgg_fc2_mean,vgg_fc2_std)
		dataset_type = np.array(hf_type.get(key))
		if len(dataset_image) > look_back:
			data_x,data_y = prepare_dataset.extend_dataset_2(dataset_image, dataset_type,look_back = look_back)
			test_x += data_x
			test_y += data_y
	hf_type.close()
	hf_image.close()
	# train = train_x + test_x
	train_x = np.array(train_x,dtype = 'float32')
	train_y = np.array(train_y,dtype = 'float32')
	test_x = np.array(test_x,dtype = 'float32')
	test_y = np.array(test_y,dtype = 'float32')
	print (train_x.shape,train_y.shape)
	print (test_x.shape,test_y.shape)
	# nb_classes = max(len(set(train_y)), len(set(test_y)))
	# print set(train_y)
	# print set(test_y)
	# print nb_classes,'nb_classes'
	model = pretrain_model(look_back,batch_size)
	if os.path.exists(ModelCheckpoint_file):
		print ('load  load_weights',ModelCheckpoint_file)
		model.load_weights(ModelCheckpoint_file)
	print(model.summary())
	y_train = np_utils.to_categorical(train_y, None)
	y_test = np_utils.to_categorical(test_y, None)
	print y_train.shape

	train_loss_hists=[]
	validation_loss_hists=[]
	train_acc_hists=[]
	validation_acc_hists=[]
	val_acc =  sys.float_info.min

	for i in range(1000):
		print (i,'epoch')
		# ModelCheckpoint_file = 'test_file/orig_weights_lstm_1.0_image_lookback_'+str(look_back)+str(i)+'_whole_equal.hdf5'
		# print('start train')
		hist = model.fit(train_x, y_train, nb_epoch=1, batch_size=batch_size, verbose=2, validation_split=0.1,shuffle=False)
		print hist.history
		model.reset_states()
		train_loss_hists.append(hist.history['loss'][0])
		validation_loss_hists.append(hist.history['val_loss'][0])
		train_acc_hists.append(hist.history['acc'][0])
		validation_acc_hists.append(hist.history['val_acc'][0])
		if val_acc < hist.history['val_acc'][0]:
			model.save_weights(ModelCheckpoint_file)
			print(i,val_acc,'->',hist.history['val_acc'][0],'save_weights',ModelCheckpoint_file)
			val_acc = hist.history['val_acc'][0]
	# print (train_hists,'train_hists')
	# print (validation_hists, 'validation_hists')
	with open(hist_path,'w') as f:
		json.dump({'train_loss':train_loss_hists,'val_loss':validation_loss_hists,'train_acc':train_acc_hists,'val_acc':validation_acc_hists},f)
	# hist = model.fit(train_x, train_y, nb_epoch=2, batch_size=batch_size, verbose=2, validation_split = 0.1,shuffle=False)
		# break
	# with open(hist_path,'w') as j:
	# 	json.dump(hist.history,j)
	# validation_hists_least_index = validation_hists.index(min(validation_hists))
	# print ('ModelCheckpoint_file','test_file/orig_weights_lstm_1.0_image_lookback_'+str(look_back)+str(validation_hists_least_index)+'_whole_equal.hdf5')
	# model.load_weights('test_file/orig_weights_lstm_1.0_image_lookback_'+str(look_back)+str(validation_hists_least_index)+'_whole_equal.hdf5')
	
	print('load_weights',ModelCheckpoint_file)
	model.load_weights(ModelCheckpoint_file)
	trainPredict = model.predict(train_x, batch_size=batch_size)
	
	model.reset_states()

	
	testPredict = model.predict(test_x, batch_size=batch_size)
	# # invert predictions


	# # calculate root mean squared error
	
	train_predictions = np.argmax(trainPredict, 1)
	train_labels = np.argmax(y_train, 1)
	test_predictions = np.argmax(testPredict, 1)
	test_labels = np.argmax(y_test, 1)


	print(look_back,'look_back')
	train_accuracy, train_cm = get_accuracy(train_predictions, train_labels, True)
	test_accuracy, test_cm = get_accuracy(test_predictions, test_labels, True)

	print (train_accuracy,'train accuracy')
	print(train_cm,'train_cm')
	print (test_accuracy,'test accuracy')
	print(test_cm,'test_cm')

	train_cm = train_cm.tolist()
	train_confusion_matrix_path = 'test_file/confusion_matrix_train_extend_normalize_'+ str( look_back) +'.json'
	with open(train_confusion_matrix_path, 'w') as f:
		json.dump(train_cm,f)

	test_cm = test_cm.tolist()
	test_confusion_matrix_path = 'test_file/confusion_matrix_test_extend_normalize_'+ str(look_back) +'.json'
	with open(test_confusion_matrix_path, 'w') as f:
		json.dump(test_cm,f)

	t2 = time.time()
	print ("using  %s seconds" % (t2-t1))
Exemplo n.º 2
0
def main():
    np.random.seed(7)
    # trackDictPath = config.track_dic_path
    # track_dict = load.load_json(trackDictPath)
    track_path = config.track_path
    suspicious_file_list_path = config.suspicious_file_list_path
    suspicious_file_list = load.load_json(suspicious_file_list_path)
    train_validation_test_subdirs_split = config.train_validation_test_subdirs_split
    intensity_mean, intensity_std = config.intensity_mean, config.intensity_std
    batch_size = config.batch_size
    ModelCheckpoint_file = 'test_file/orig_weights_lstm_1.0_lookback_24.hdf5'
    print('ModelCheckpoint_file',
          ModelCheckpoint_file)  #config.ModelCheckpoint_file
    look_back = 1
    batch_size = 1
    print(look_back, 'look_back')
    file_list = []
    model = lstm_model_1(batch_size, look_back)
    # model.load_weights(ModelCheckpoint_file)
    for subdir, dirs, files in os.walk(track_path):
        for file in files:
            file_path = os.path.join(subdir, file)
            file_list.append(file_path)
    file_list = np.array(file_list)
    np.random.shuffle(file_list)
    file_list = list(file_list)
    # file_list = file_list[:10]
    # print (file_list)
    # for file in file_list:
    # 	if len(file) <=2:
    # 		print (file)
    # 		print (file_list.index(file))
    file_list = file_list[:10]
    train_file_list = file_list[:int(0.9 * len(file_list))]
    # validation_file_list = file_list[int(0.85*len(file_list)):int(0.9*len(file_list))]
    test_file_list = file_list[int(0.9 * len(file_list)):]
    print(len(train_file_list))
    # print (len(validation_file_list))
    print(len(test_file_list))

    testX = []
    testY = []
    # dataset_count = 0
    train_histss = []
    validation_histss = []
    train_file_list_copy = train_file_list
    # trainXS=np.array([]).reshape(0,look_back)
    # print (trainXS.shape,'trainxs shape')
    # trainYS = np.array([]).reshape(0,1)
    trainXS = []
    trainYS = []
    for i in np.arange(0, len(train_file_list_copy),
                       12):  #len(train_file_list_copy)
        trainX = []
        trainY = []
        train_hists = []
        validation_hists = []
        print(i, 'i')
        train_file_list = train_file_list_copy[i:i + 12]
        # print len(train_file_list)
        for file in train_file_list:
            # print file
            # try:
            data = prepare_dataset.dataset_1(file)
            data = prepare_dataset.normalize_intensity(data, intensity_mean,
                                                       intensity_std)
            # data = list(data)
            trainXx, trainYy = prepare_dataset.create_dataset(data, look_back)
            trainX += trainXx
            trainY += trainYy
            # print (trainX,'trainX')
            # print (trainY,'trainY')
            # break
            # dataset_count += data.shape[0]
            # except:
            # 	print(file,'error')
        trainX = np.array(trainX, dtype='float32')
        trainY = np.array(trainY, dtype='float32')
        # print (trainX.shape)
        # print(trainY.shape,'trainY SHAPE')
        trainX = np.reshape(trainX, (trainX.shape[0], trainX.shape[1], 1))
        # trainXS = np.vstack((trainXS, trainX))
        # trainYS = np.vstack((trainYS, trainY))
        # print (trainXS.shape,'trainxs shape')
        # break
        # return

        trainXS.append(trainX)
        trainYS.append(trainY)
        """
		training
		"""

        for i in range(100):
            hist = model.fit(trainX,
                             trainY,
                             nb_epoch=1,
                             batch_size=batch_size,
                             verbose=2,
                             validation_split=0.1,
                             shuffle=False)
            model.reset_states()
            train_hists.append(hist.history['loss'][0])
            validation_hists.append(hist.history['val_loss'][0])
        # print (hists,'hists')
        train_histss.append(train_hists)
        validation_histss.append(validation_hists)
    print(train_histss, 'train_histss')
    print(validation_histss, 'validation_histss')
    """
def main():
    np.random.seed(7)
    # trackDictPath = config.track_dic_path
    # track_dict = load.load_json(trackDictPath)
    track_path = config.track_path
    suspicious_file_list_path = config.suspicious_file_list_path
    suspicious_file_list = load.load_json(suspicious_file_list_path)
    train_validation_test_subdirs_split = config.train_validation_test_subdirs_split
    intensity_mean, intensity_std = config.intensity_mean, config.intensity_std
    batch_size = config.batch_size
    ModelCheckpoint_file = config.ModelCheckpoint_file
    train_predict_image = config.train_predict_image
    test_predict_image = config.test_predict_image
    look_back = 3
    file_list = []
    for subdir, dirs, files in os.walk(track_path):
        for file in files:
            file_path = os.path.join(subdir, file)
            file_list.append(file_path)
    file_list = np.array(file_list)
    np.random.shuffle(file_list)
    file_list = list(file_list)
    file_list = file_list[:10]
    # print (file_list)
    # for file in file_list:
    # 	if len(file) <=2:
    # 		print (file)
    # 		print (file_list.index(file))
    # file_list = file_list[:10]
    train_file_list = file_list[:int(0.9 * len(file_list))]
    test_file_list = file_list[int(0.9 * len(file_list)):]
    # print(train_file_list)
    trainX = []
    trainY = []
    testX = []
    testY = []
    dataset_count = 0
    for file in train_file_list:
        try:
            data = prepare_dataset.dataset_1(file)
            data = prepare_dataset.normalize_intensity(data, intensity_mean,
                                                       intensity_std)
            # data = list(data)
            trainXx, trainYy = prepare_dataset.create_dataset(data, look_back)
            trainX += trainXx
            trainY += trainYy
            dataset_count += data.shape[0]
        except:
            print(file)

    for file in test_file_list:
        try:
            data = prepare_dataset.dataset_1(file)
            data = prepare_dataset.normalize_intensity(data, intensity_mean,
                                                       intensity_std)
            # data = list(data)
            testXx, testYy = prepare_dataset.create_dataset(data, look_back)
            testX += testXx
            testY += testYy
            dataset_count += data.shape[0]
        except:
            print(file)

    trainX = np.array(trainX, dtype='float32')
    trainY = np.array(trainY, dtype='float32')
    testX = np.array(testX, dtype='float32')
    testY = np.array(testY, dtype='float32')

    print(trainX.shape)
    print(testX.shape)

    trainX = np.reshape(trainX, (trainX.shape[0], trainX.shape[1], 1))
    testX = np.reshape(testX, (testX.shape[0], testX.shape[1], 1))
    batch_size = 1
    model = Sequential()
    model.add(
        LSTM(4, batch_input_shape=(batch_size, look_back, 1), stateful=True))
    model.add(Dense(3))
    model.compile(loss='mean_squared_error', optimizer='adam')
    # checkpointer = ModelCheckpoint(filepath=ModelCheckpoint_file, verbose=2, save_best_only=True)
    hists = []
    for i in range(10):
        hist = model.fit(trainX,
                         trainY,
                         nb_epoch=1,
                         batch_size=batch_size,
                         verbose=2,
                         shuffle=False)
        model.reset_states()
        hists.append(hist.history['loss'][0])
    print(hists, 'hists')
    # model.save_weights(ModelCheckpoint_file)
    # make predictions

    trainPredict = model.predict(trainX, batch_size=batch_size)
    model.reset_states()
    testPredict = model.predict(testX, batch_size=batch_size)
    # invert predictions
    trainPredict = prepare_dataset.reverse_normalize_intensity(
        trainPredict, intensity_mean, intensity_std)
    trainY = prepare_dataset.reverse_normalize_intensity(
        trainY, intensity_mean, intensity_std)
    testPredict = prepare_dataset.reverse_normalize_intensity(
        testPredict, intensity_mean, intensity_std)
    testY = prepare_dataset.reverse_normalize_intensity(
        testY, intensity_mean, intensity_std)
    # calculate root mean squared error
    # print (trainPredict[:,0], 'trainPredict')
    # print (trainPredict.shape,'len_train_predict')
    # print(trainY[0],'trainY')
    trainScore = math.sqrt(mean_squared_error(trainY, trainPredict[:, 0]))
    print('Train Score: %.2f RMSE' % (trainScore))
    testScore = math.sqrt(mean_squared_error(testY, testPredict[:, 0]))
    print('Test Score: %.2f RMSE' % (testScore))
    dataset = np.zeros((dataset_count, 1), dtype='float32')

    # trainPredictPlot = np.empty_like(dataset)
    # trainPredictPlot[:, :] = np.nan
    # trainPredictPlot[look_back:len(trainPredict)+look_back, :] = trainPredict
    # # shift test predictions for plotting
    # testPredictPlot = np.empty_like(dataset)
    # testPredictPlot[:, :] = np.nan
    # testPredictPlot[len(trainPredict)+(look_back*2)+1:len(dataset)-1, :] = testPredict
    # # plt.plot(dataset))
    fig = plt.figure()
    plt.title('train_predicts_look_back')
    plt.plot(list(trainPredict[:, 0]), 'r--', label='train_predict')
    plt.plot(list(trainY), 'g--', label='train')
    plt.legend(loc='upper left', shadow=True)
    plt.xlabel('typhoon_image')
    plt.ylael('typhoon intensity')
    plt.savefig(train_predict_image)
    plt.close(fig)
    fig = plt.figure()
    plt.title('test_predicts_look_back')
    plt.plot(list(testPredict[:, 0]), 'r--', label='test_predict')
    plt.plot(list(testY), 'g--', label='test')
    plt.xlabel('typhoon_image')
    plt.ylael('typhoon intensity')
    plt.legend(loc='upper left', shadow=True)
    plt.savefig(test_predict_image)
    plt.close(fig)
def main():
    np.random.seed(7)
    t1 = time.time()
    image_path = config.image_path
    track_path = config.track_path
    track_dic_path = config.track_dic_path
    track_dict = load.load_json(track_dic_path)
    intensity_mean, intensity_std = config.intensity_mean, config.intensity_std
    batch_size = config.batch_size
    ModelCheckpoint_file = config.ModelCheckpoint_file
    look_back = config.look_back
    img_rows, img_cols = config.img_rows, config.img_cols
    subdir_list = []
    hist_path = config.hist_path
    mean_v, std_v = config.mean_v, config.std_v
    intensity_mean, intensity_std = config.intensity_mean, config.intensity_std
    model = pretrain_model(look_back, batch_size)
    if os.path.exists(ModelCheckpoint_file):
        print('load  load_weights', ModelCheckpoint_file)
        model.load_weights(ModelCheckpoint_file)
    print(model.summary())
    # train_x = np.random.uniform(0,1,(17, 3, 1, 512, 512))
    # train_y = np.random.uniform(0,1,(17,1))
    # print (train_x)
    # train_x = np.array(train_x,dtype = 'float32')
    # train_y = np.array(train_y,dtype= 'float32')
    # hist = model.fit(train_x, train_y, nb_epoch=1, batch_size=batch_size, verbose=2, validation_split=0.1,shuffle=False)
    """
	count the number of image in each typhoon sequence
	"""
    image_number_dictionary = {}
    for subdirs, dirs, files in os.walk(image_path):
        # print (subdirs)
        subdir_list.append(subdirs)
    for subdir in subdir_list:
        count = 0
        for subdirs, dirs, files in os.walk(subdir):
            for file in files:
                count += 1
        key = subdir.split('/')[-1]
        image_number_dictionary[key] = count
        if count < 24:
            print(key, count)
    # print (image_number_dictionary)
    """
	check the number of images equals the number of track data?
	"""
    # for subdir in subdir_list:
    # 	for subdirs, dirs, files in os.walk(subdir):
    # 		for file in files:
    # 			# print (file)
    # 			[k1, k2] = file.split("-")[:2]
    # 			key = "".join((k1,k2))
    # 			try:
    # 				mark = track_dict[key]
    # 			except KeyError:
    # 				print (file +'do not have track value')

    # for k in track_dict.keys():
    # 	k2 = k[-6:] # typhoon number
    # 	k1 = k[:-6]
    # 	file = k1 +'-' + k2 +'*'
    # 	file_path = image_path + k2 +'/' + file
    # 	if not os.path.isfile(file_path):
    # 		print (file_path not exists)
    track_dict_number = {}
    equal_track_image_list = []
    not_equal_track_image_list = []
    for subdir in subdir_list:
        key = subdir.split('/')[-1]

        if len(key) > 0 and key not in ['201620', '201621', '201622']:
            track_file_path = track_path + key + '.itk'
            with open(track_file_path, 'rb') as tsv_file:
                tsv_reader = csv.reader(tsv_file, delimiter='\t')
                count = 0
                for row in tsv_reader:
                    count += 1
                track_dict_number[key] = count
                if count != image_number_dictionary[key]:
                    not_equal_track_image_list.append(key)
                    # print (key,count,image_number_dictionary[key],'not equal')
                if count == image_number_dictionary[key]:
                    # print  (key,count,image_number_dictionary[key],' equal')
                    equal_track_image_list.append(key)
    # print (not_equal_track_image_list,'not_equal_track_image_list')
    # print (equal_track_image_list,'equal_track_image_list')

    print(len(equal_track_image_list), 'lenth of eqaual track image list')
    # "check if track file difference is one hour, result is yes for both equal and not_eqaul_image_list "

    for key in not_equal_track_image_list:
        ts = []
        track_file_path = track_path + key + '.itk'
        with open(track_file_path, 'rb') as tsv_file:
            tsv_reader = csv.reader(tsv_file, delimiter='\t')
            for row in tsv_reader:
                yy = row[0]
                mm = row[1]
                dd = row[2]
                hh = row[3]
                t = datetime.datetime.strptime(
                    yy + ":" + mm + ":" + dd + ':' + hh, '%Y:%m:%d:%H')
                ts.append(t)
        tmp = ts[0]
        for i in range(1, len(ts)):
            dif = (ts[i] - tmp).total_seconds()
            # print (dif,'dif')
            if dif != 3600:
                print(dif, i, key)
            tmp = ts[i]
        # break
    dataset_imageset_path = 'test_file/dataset_image_unequal.hdf5'
    dataset_intensity_path = 'test_file/dataset_intensity_unequal.hdf5'
    hf_image = h5py.File(dataset_imageset_path)
    hf_intensity = h5py.File(dataset_intensity_path)
    vgg_model = VGG_16('vgg16_weights.h5')
    sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True)
    vgg_model.compile(optimizer=sgd, loss='categorical_crossentropy')
    for key in not_equal_track_image_list:
        # # for key in equal_track_image_list:
        image_folder = image_path + key + '/'
        # 	dataset_x,dataset_y = prepare_dataset.dataset_1_2(image_folder,track_dict)
        # 	print dataset_x.shape
        # 	print dataset_y.shape
        # 	break
        file_path_list = []
        # print key
        dataset_image = []
        dataset_intensity = []
        for subdirs, dirs, files in os.walk(image_folder):
            for file in files:
                file_path = os.path.join(subdirs, file)
                file_path_list.append(file_path)
        sorted_file_list = sorted(
            file_path_list, key=lambda x: int(x.split('/')[-1].split('-')[-4]))
        # print (len(sorted_file_list),'len of sorted_file_list')
        ts = []
        intensities = []
        for file_path in sorted_file_list:
            yymmddhh = file_path.split('/')[-1].split('-')[-4]
            track_key = yymmddhh + key
            intensities.append(float(track_dict[track_key][-2]))
            t = datetime.datetime.strptime(yymmddhh, '%Y%m%d%H')
            ts.append(t)
        # print len(ts),'len ts'
        tmp = ts[0]
        orig_image = load.get_x(sorted_file_list, img_rows, img_cols, mean_v,
                                std_v)
        tmp_image = orig_image[0]
        # 		dataset_input = get_fc2(vgg_model,dataset_image)
        # 		dataset_input = np.array(dataset_input)

        dataset_image.append(orig_image[0])
        dataset_intensity.append(intensities[0])
        for i in range(1, len(ts)):
            dif = (ts[i] - tmp).total_seconds()
            # print (dif,'dif')
            if dif != 3600:
                print(dif / 3600.0, i, key, ts[i])
                for j in range(1, int(dif / 3600.0)):
                    t2 = tmp + datetime.timedelta(seconds=3600)
                    yy = t2.year
                    mm = str(t2.month).zfill(2)
                    dd = str(t2.day).zfill(2)
                    hh = str(t2.hour).zfill(2)
                    yymmddhh = str(yy) + mm + dd + hh
                    track_key = yymmddhh + key
                    intensity = float(track_dict[track_key][-2])
                    image = (1 - (float(j) / (dif / 3600.0))) * tmp_image + (
                        float(j) / (dif / 3600.0)) * orig_image[i]
                    dataset_image.append(image)
                    dataset_intensity.append(intensity)
            dataset_image.append(orig_image[i])
            dataset_intensity.append(intensities[i])

            tmp = ts[i]
            tmp_image = orig_image[i]
        dataset_image = np.array(dataset_image)
        dataset_input = get_fc2(vgg_model, dataset_image)
        dataset_intensity = np.array(dataset_intensity)
        dataset_intensity = prepare_dataset.normalize_intensity(
            dataset_intensity, intensity_mean, intensity_std)
        hf_image.create_dataset(key, data=dataset_input)
        hf_intensity.create_dataset(key, data=dataset_intensity)

    hf_image.close()
    hf_intensity.close()

    t2 = time.time()
    print("using  %s seconds" % (t2 - t1))