def find_matching_cloudsat_files(radiance_filename, cloudsat_dir): """ :param radiance_filename: the filename for the radiance .hdf, demarcated with "MYD02". :return cloudsat_filenames: a list of paths to the corresponding cloudsat files (1 or 2 files) The time of the radiance file is used for selecting the cloudsat files: a MODIS swath is acquired every 5 minutes, while a CLOUDSAT granule is acquired every ~99 minutes. It can happen that a swath crosses over two granules. The filenames specify the starting time of the acquisition. CLOUDSAT filenames are in the format: AAAADDDHHMMSS_*.hdf """ basename = os.path.basename(radiance_filename) year, abs_day, hour, minutes = get_file_time_info(basename) year, abs_day, hour, minutes = int(year), int(abs_day), int(hour), int( minutes) swath_dt = get_datetime(year, abs_day, hour, minutes) cloudsat_filenames = find_cloudsat_by_day(abs_day, year, cloudsat_dir) # collect all granules before and after swath's time prev_candidates, foll_candidates = {}, {} for filename in cloudsat_filenames: cs_time_info = os.path.basename(filename) year, day, hour, minute, second = int(cs_time_info[:4]), int( cs_time_info[4:7]), int(cs_time_info[7:9]), int( cs_time_info[9:11]), int(cs_time_info[11:13]) granule_dt = get_datetime(year, day, hour, minute, second) if granule_dt <= swath_dt and (swath_dt - granule_dt).total_seconds() < 6000: prev_candidates[granule_dt] = filename elif granule_dt >= swath_dt and (granule_dt - swath_dt).total_seconds() < 300: foll_candidates[granule_dt] = filename prev_dt = max(prev_candidates.keys()) # if swath crosses over two cloudsat granules, return both if len(foll_candidates.keys()) > 0: foll_dt = min(foll_candidates.keys()) return prev_candidates[prev_dt], foll_candidates[foll_dt] return [prev_candidates[prev_dt]]
def save_as_nc(swath, layer_info, swath_path, save_name): copy, variables = copy_dataset_structure( os.path.join("netcdf", "cumulo.nc"), save_name) # determine swath status from directory hierarchy status = "corrupt" if "daylight" in save_name: status = "daylight" elif "night" in save_name: status = "night" # convert npy to nc year, abs_day, hour, minute = get_file_time_info(swath_path) minutes_since_2008 = minutes_since(int(year), int(abs_day), int(hour), int(minute)) fill_dataset(copy, variables, swath, layer_info, minutes_since_2008, status) copy.close()
int(minute)) fill_dataset(copy, variables, swath, layer_info, minutes_since_2008, status) copy.close() if __name__ == "__main__": swath_path = sys.argv[2] save_dir = sys.argv[1] swath, layer_info = load_npys(swath_path) # get time info year, abs_day, hour, minute = get_file_time_info(swath_path) month = get_datetime(year, int(abs_day)).month # determine swath status from directory hierarchy status = "corrupt" if "daylight" in swath_path: status = "daylight" elif "night" in swath_path: status = "night" # create save directory if not os.path.exists(save_dir): os.makedirs(save_dir) #create a copy of reference dataset copy_name = "A{}.{}.{}{}.nc".format(year, abs_day, hour, minute)
import sys from pathlib import Path from netcdf.npy_to_nc import save_as_nc from src.utils import get_file_time_info myd02_filename = sys.argv[2] save_dir = sys.argv[1] root_dir, filename = os.path.split(myd02_filename) month, day = root_dir.split("/")[-2:] # get time info year, abs_day, hour, minute = get_file_time_info(myd02_filename) save_name = "A{}.{}.{}{}.nc".format(year, abs_day, hour, minute) # recursvely check if file exist in save_dir for _ in Path(save_dir).rglob(save_name): raise FileExistsError("{} already exist. Not extracting it again.".format(save_name)) root_dir = "/mnt/modisaqua/{}/".format(year) myd03_dir = os.path.join(root_dir, "MODIS", "data", "MYD03", "collection61", year, month, day) myd06_dir = os.path.join(root_dir, "MODIS", "data", "MYD06_L2", "collection61", year, month, day) myd35_dir = os.path.join(root_dir, "MODIS", "data", "MYD35_L2", "collection61", year, month, day) cloudsat_lidar_dir = None cloudsat_dir = os.path.join(root_dir, "CloudSat") # extract training channels, validation channels, cloud mask, class occurences if provided