def set_trigger_newerFTP(folder, file_time_dict): pmu.force_register("Updating trigger folder " + folder) try: files = os.listdir(folder) except OSError: return None for f in files: if f.split(".")[len(f.split("."))-1] != "gz" and f.isdigit(): file_path = folder + "/" + f info_file_path = f stats = os.stat(file_path) lastmod_date = time.localtime(stats[8]) if f not in file_time_dict or lastmod_date > file_time_dict[f]: datetime_str_flat = time.strftime("%Y%m%d%H%M%S", lastmod_date) print "Found an updated file ", f, ", lastmod_date=", datetime_str_flat if f in file_time_dict: print "And file: ", f, ", org_lastmod_date=", time.strftime("%Y%m%d%H%M%S", file_time_dict[f]) command = "LdataWriter -dir " + opt_output_dir \ + " -rpath " + info_file_path + " -dtype nc -ext nc -ltime " + datetime_str_flat os.system(command) else: print "set_trigger_newerFTP ignores non NC file: ", f pmu.force_register("Updated trigger folder " + folder) return
def set_trigger_newer(folder, threshold_time, date_str_flat): pmu.force_register("Updating trigger folder " + folder) try: files = os.listdir(folder) except OSError: return None for f in files: if f.split(".")[len(f.split(".")) - 1] == "nc": file_path = folder + f info_file_path = date_str_flat + "/" + f stats = os.stat(file_path) lastmod_date = time.localtime(stats[8]) if lastmod_date > threshold_time: print "Found a new file", f command = "LdataWriter -dir " + opt_output_dir \ + " -rpath " + info_file_path + " -dtype nc -ext nc -ltime " + date_str_flat + "000000" os.system(command) else: print "set_trigger ignores non NC file: ", f pmu.force_register("Updated trigger folder " + folder) return
def set_trigger_newer(folder, threshold_time, date_str_flat): pmu.force_register("Updating trigger folder " + folder) try: files = os.listdir(folder) except OSError: return None for f in files: if f.split(".")[len(f.split("."))-1] == "nc": file_path = folder + f info_file_path = date_str_flat+"/" + f stats = os.stat(file_path) lastmod_date = time.localtime(stats[8]) if lastmod_date > threshold_time: print "Found a new file", f command = "LdataWriter -dir " + opt_output_dir \ + " -rpath " + info_file_path + " -dtype nc -ext nc -ltime " + date_str_flat + "000000" os.system(command) else: print "set_trigger ignores non NC file: ", f pmu.force_register("Updated trigger folder " + folder) return
def set_trigger_newerFTP(folder, file_time_dict): pmu.force_register("Updating trigger folder " + folder) try: files = os.listdir(folder) except OSError: return None for f in files: if f.split(".")[len(f.split(".")) - 1] != "gz" and f.isdigit(): file_path = folder + "/" + f info_file_path = f stats = os.stat(file_path) lastmod_date = time.localtime(stats[8]) if f not in file_time_dict or lastmod_date > file_time_dict[f]: datetime_str_flat = time.strftime("%Y%m%d%H%M%S", lastmod_date) print "Found an updated file ", f, ", lastmod_date=", datetime_str_flat if f in file_time_dict: print "And file: ", f, ", org_lastmod_date=", time.strftime( "%Y%m%d%H%M%S", file_time_dict[f]) command = "LdataWriter -dir " + opt_output_dir \ + " -rpath " + info_file_path + " -dtype nc -ext nc -ltime " + datetime_str_flat os.system(command) else: print "set_trigger_newerFTP ignores non NC file: ", f pmu.force_register("Updated trigger folder " + folder) return
def pull_file(url, filename, output_dir, date_str_flat): if opt_debug: print "Getting file <" + filename + ">" pmu.force_register("Getting file " + filename) time_str_flat = parse_time_from_filename(filename) lfilename = time_str_flat + ".nc" # # Construct the local filenames. The temporary filename is used # for pulling the file down. When the retrieval is complete, the # file is renamed to its original name so that it appears atomically. # local_filename = output_dir + '/' + filename if opt_debug: print "local_filename = " + local_filename org_lastmod_date = 0 if os.path.exists(local_filename): stats = os.stat(local_filename) org_lastmod_date = time.localtime(stats[8]) # # Pull the data file to the destination directory. # pmu.force_register("Retrieving file " + filename) try: command = "wget -N -P" + output_dir + "/ " + url + "/" + filename print command os.system(command) except (socket.error): print "Error occurred while trying to transfer file: ", filename print "Skipping file!" return # # set trigger file only when data files are updated. # if os.path.exists(local_filename): stats = os.stat(local_filename) lastmod_date = time.localtime(stats[8]) if org_lastmod_date < lastmod_date: if opt_generate_ldata_info: rpath = date_str_flat + "/" + filename date_str = time.strftime("%Y%m%d%H%M%S", time.localtime(stats[8])) set_trigger(rpath, "nc", date_str) return
def push_file(input_filename): if opt_debug: print "Pushing file ", input_filename pmu.force_register("Pushing file " + input_filename) # # Open the ftp connection # ftp = ftplib.FTP(opt_dest_host, opt_dest_user, opt_dest_pwd) if opt_passive: ftp.set_pasv(1) # # Move to the appropriate output directory # ftp.cwd(opt_dest_dir) # # Push the data file to the destination machine. # Prepend an '_' to the filename while sending then # rename it to the appropriate names so it appears # autonomously. # pmu.force_register('Sending file ' + input_filename) if opt_use_temp_file: output_filename = "_" + input_filename else: output_filename = input_filename datafile = open(opt_input_dir + '/' + input_filename, 'r') try: ftp.storbinary('STOR ' + output_filename, datafile, 8192) except (socket.error): print "Socket error occurred while trying to transfer file: ", datafile print "Skipping file!" return datafile.close() if opt_use_temp_file: ftp.rename(output_filename, input_filename) # # Close the FTP connection # ftp.quit()
def pull_file(url, filename, output_dir, date_str_flat): if opt_debug: print "Getting file <" + filename + ">" pmu.force_register("Getting file " + filename) time_str_flat = parse_time_from_filename(filename) lfilename = time_str_flat +".nc" # # Construct the local filenames. The temporary filename is used # for pulling the file down. When the retrieval is complete, the # file is renamed to its original name so that it appears atomically. # local_filename = output_dir + '/' + filename if opt_debug: print "local_filename = " + local_filename org_lastmod_date = 0 if os.path.exists(local_filename): stats = os.stat(local_filename) org_lastmod_date = time.localtime(stats[8]) # # Pull the data file to the destination directory. # pmu.force_register("Retrieving file " + filename) try: command = "wget -N -P" + output_dir + "/ " + url + "/" + filename print command os.system(command) except (socket.error): print "Error occurred while trying to transfer file: ", filename print "Skipping file!" return # # set trigger file only when data files are updated. # if os.path.exists(local_filename): stats = os.stat(local_filename) lastmod_date = time.localtime(stats[8]) if org_lastmod_date < lastmod_date: if opt_generate_ldata_info : rpath = date_str_flat + "/" + filename date_str = time.strftime("%Y%m%d%H%M%S", time.localtime(stats[8])) set_trigger(rpath,"nc",date_str) return
def set_trigger(filename, extension, date_str): pmu.force_register("Updating trigger file " + filename) print "create _latest data for file", filename command = "LdataWriter -dir " + opt_output_dir \ + " -rpath " + filename + " -dtype " + opt_data_type if extension != "": command = command + " -ext " + extension if date_str != "": command = command + " -ltime " + date_str os.system(command) print command pmu.force_register("Updated trigger filename " + filename) return
def pull_file_list(url, output_base_dir): if opt_debug: print "Getting URL <" + url + ">" print "Putting dir <" + output_base_dir + ">" # # Pull the data file to the destination directory. # indexFile = output_base_dir + "/index.xml" pmu.force_register("Retrieving index from URL " + url) try: command = "wget -O " + indexFile + " " + url print command os.system(command) except (socket.error): print "Error occurred while trying to transfer URL: ", url print "Skipping file!" return pmu.force_register("Retrieved URL " + url) indexFileUrl = "file://" + indexFile sp = Spider(indexFileUrl) fileList = sp.get_links() os.unlink(indexFile) sp.close() pmu.force_register("Retrieved remote file list " ) return fileList
def pull_file_list(url, output_base_dir): if opt_debug: print "Getting URL <" + url + ">" print "Putting dir <" + output_base_dir + ">" # # Pull the data file to the destination directory. # indexFile = output_base_dir + "/index.xml" pmu.force_register("Retrieving index from URL " + url) try: command = "wget -O " + indexFile + " " + url print command os.system(command) except (socket.error): print "Error occurred while trying to transfer URL: ", url print "Skipping file!" return pmu.force_register("Retrieved URL " + url) indexFileUrl = "file://" + indexFile sp = Spider(indexFileUrl) fileList = sp.get_links() os.unlink(indexFile) sp.close() pmu.force_register("Retrieved remote file list ") return fileList
def get_latest_file_time(folder): pmu.force_register("get latest time " + folder) files = os.listdir(folder) print "list files" current_time = 0 for f in files: if f.split(".")[len(f.split("."))-1] == "nc": file_path = folder + f stats = os.stat(file_path) lastmod_date = time.localtime(stats[8]) if current_time < lastmod_date: print "older than today", f current_time = lastmod_date else: print "newer than today", f else: print "non NC file: ", f print "Newest file time=", current_time pmu.force_register("got latest time " + folder) return current_time
def pull_file_wget(url, output_base_dir): if opt_debug: print "Getting URL <" + url + ">" print "Putting dir <" + output_base_dir + ">" # # Pull the data file to the destination directory. # pmu.force_register("Retrieving URL " + url) try: command = "wget --timestamping -A nc -r -l1 -np --cut-dirs 6 -nH -P" + output_base_dir + " " + url print command os.system(command) except (socket.error): print "Error occurred while trying to transfer URL: ", url print "Skipping file!" return pmu.force_register("Retrieved URL " + url) return
def pull_file_wget(url, output_base_dir): if opt_debug: print "Getting URL <" + url + ">" print "Putting dir <" + output_base_dir + ">" # # Pull the data file to the destination directory. # pmu.force_register("Retrieving URL " + url) try: command = "wget --timestamping -A nc -r -l1 -np --cut-dirs 6 -nH -P"+ output_base_dir + " " + url print command os.system(command) except (socket.error): print "Error occurred while trying to transfer URL: ", url print "Skipping file!" return pmu.force_register("Retrieved URL " + url) return
def get_latest_file_time(folder): pmu.force_register("get latest time " + folder) files = os.listdir(folder) print "list files" current_time = 0 for f in files: if f.split(".")[len(f.split(".")) - 1] == "nc": file_path = folder + f stats = os.stat(file_path) lastmod_date = time.localtime(stats[8]) if current_time < lastmod_date: print "older than today", f current_time = lastmod_date else: print "newer than today", f else: print "non NC file: ", f print "Newest file time=", current_time pmu.force_register("got latest time " + folder) return current_time
def pull_file(filename): pmu.auto_register("Checking for new files") # # Construct the local filenames. The temporary filename is used # for pulling the file down. When the retrieval is complete, the # file is renamed to its original name so that it appears atomically. # temp_filename = opt_temp_dir + '/.' + filename local_filename = opt_output_dir + '/' + filename if opt_debug: print "local_filename = " + local_filename print "temp_filename = " + temp_filename # # Check to see if we already have this file. If we do, then skip # it. # local_gz_filename = local_filename + ".gz" # # this is the case of the file that is gzipped on the ftp site # but unzipped locally # if(os.path.splitext(local_filename)[1] == ".gz"): if os.path.exists( os.path.splitext(local_filename)[0] ): if opt_debug: print print "File already exists locally: " + local_filename print "Skipping retrieval...." return if os.path.exists(local_filename) or os.path.exists(local_gz_filename): if opt_debug: print print "File already exists locally: " + local_filename print "Skipping retrieval...." return if opt_debug: print "Getting file <" + filename + ">" # # Wait for the file to be quiescent # if opt_wait_for_quiescence: prev_file_size = 0 file_size = ftp.size(filename) while file_size != prev_file_size: if opt_debug: print "Waiting for file quiescence..." time.sleep(opt_quiescence_secs) prev_file_size = file_size file_size = ftp.size(filename) # # Pull the data file to the destination directory. # temp_file = open(temp_filename, 'wb') pmu.force_register("Retrieving file " + filename) try: ftp.retrbinary('RETR ' + filename, temp_file.write) except (socket.error): print "Socket error occurred while trying to transfer file: ", filename print "Skipping file!" return temp_file.close() if(os.path.splitext(temp_filename)[1] == ".gz"): filename = os.path.splitext(filename)[0] os.system("gunzip " + temp_filename) temp_filename = os.path.splitext(temp_filename)[0] local_filename = os.path.splitext(local_filename)[0] try: shutil.copyfile(temp_filename, local_filename) except(OSError): print "Error Copying temp file to local file\n" if(opt_write_Ldata): file_stats = os.stat(local_filename) data_file_time = datetime.fromtimestamp(file_stats[8]) Ldata_command = "LdataWriter -dir " + opt_output_dir + " -ext " + opt_Ldata_ext + " -dtype " + opt_Ldata_dtype + " -ltime " + data_file_time.strftime("%Y%m%d%H%M%S") + " -rpath ./" + filename print print "LdataWriter command line:" print Ldata_command print os.system(Ldata_command) try: os.remove(temp_filename) except(OSError): print "Error removing temp file\n" return
def push_file(input_ldata): input_path = input_ldata.data_path(opt_input_dir) if opt_debug: print "Pushing file ", input_path # # Make sure the file exists -- it could have been scrubbed. # try: os.stat(input_path) except os.error: if opt_debug: print "*** File doesn't exist, skipping" return # # Open the ftp connection # ftp = ftplib.FTP(opt_dest_host, opt_dest_user, opt_dest_pwd) if opt_passive: ftp.set_pasv(1) # # Move to the appropriate output directory # ftp.cwd(opt_dest_dir) # # Get the subdirectory and filename for the output file. # subdir = input_ldata.data_subdir() filename = input_ldata.data_filename() if opt_debug: print "subdir = ", subdir print "filename = ", filename # # Put the output file in the appropriate directory structure # if opt_flat_output: output_filename = subdir + '_' + filename else: output_filename = filename # # Create the subdirectory and move to it. It's okay # if there's an exception because that means that the # subdir already exists. # try: ftp.mkd(subdir) except ftplib.error_perm: pass ftp.cwd(subdir) # # Push the data file(s) to the destination machine. # Prepend an '_' to the filenames while sending then # rename them to the appropriate names so they appear # autonomously. # pmu.force_register('Sending file ' + input_path) datafile = open(input_path, 'r') ftp.storbinary('STOR _' + output_filename, datafile, 8192) datafile.close() ftp.rename('_' + output_filename, output_filename) if opt_push_ldata: if not opt_flat_output: ftp.cwd('..') ldata_path = opt_input_dir + '/_latest_data_info' pmu.force_register('Sending file ' + ldata_path) ldatafile = open(ldata_path, 'r') ftp.storlines('STOR __latest_data_info', ldatafile) ldatafile.close() ftp.rename('__latest_data_info', '_latest_data_info') # # Close the FTP connection # ftp.close()
def push_file(input_ldata): # # Open the ftp connection # ftp = ftplib.FTP(opt_dest_host, opt_dest_user, opt_dest_pwd) if opt_passive: ftp.set_pasv(1) # # Move to the appropriate output directory # ftp.cwd(opt_dest_dir) # # Get the filenames for the output files. # data_filename = input_ldata.data_subdir() + '.data' indx_filename = input_ldata.data_subdir() + '.indx' if opt_debug: print "data_filename = ", data_filename print "indx_filename = ", indx_filename # # Push the files to the destination machine. # Prepend an '_' to the filenames while sending then # rename them to the appropriate names so they appear # autonomously. # pmu.force_register('Sending file ' + data_filename) data_path = opt_input_dir + '/' + data_filename if opt_debug: print "Sending file ", data_path datafile = open(data_path, 'r') ftp.storbinary('STOR _' + data_filename, datafile, 8192) datafile.close() ftp.rename('_' + data_filename, data_filename) indx_path = opt_input_dir + '/' + indx_filename if opt_debug: print "Sending file ", indx_path indxfile = open(indx_path, 'r') ftp.storbinary('STOR _' + indx_filename, indxfile, 8192) indxfile.close() ftp.rename('_' + indx_filename, indx_filename) if opt_push_ldata: ldata_path = opt_input_dir + '/_latest_data_info' pmu.force_register('Sending file ' + ldata_path) ldatafile = open(ldata_path, 'r') ftp.storlines('STOR __latest_data_info', ldatafile) ldatafile.close() ftp.rename('__latest_data_info', '_latest_data_info') # # Close the FTP connection # ftp.close()
def pull_file_ftp(): # # Create the output directory # create_dir(opt_output_dir) pmu.force_register("Listing file from output dir:" + opt_output_dir) # # construct a list of all gz files (used by wget to reject # files = os.listdir(opt_output_dir) print "list files" file_time_dict = {} rejectNameList = "" for f in files: if f.split(".")[len(f.split(".")) - 1] == "gz": zfile = f.split(".")[0] if opt_debug: print "gzipped file: ", zfile if zfile.isdigit: if rejectNameList == "": rejectNameList = zfile else: rejectNameList = rejectNameList + "," + zfile else: if opt_debug: print "A non gz file: ", f if f.startswith("_"): if opt_debug: print "A file starts with _: ", f else: file_path = opt_output_dir + "/" + f stats = os.stat(file_path) file_time_dict[f] = time.localtime(stats[8]) # # Pull the data file to the destination directory. # pmu.force_register("Retrieving file from remote dir: " + opt_source_dir) try: formattedSourceDir = "/" + opt_source_dir if opt_source_dir.startswith("/"): formattedSourceDir = opt_source_dir if not formattedSourceDir.endswith("/"): formattedSourceDir = formattedSourceDir + "/" numCutDir = formattedSourceDir.count("/") - 1 command = "wget -N -r -l1 --ftp-user="******" --ftp-password="******" --cut-dirs " + str( numCutDir ) + " -nH -P" + opt_output_dir + "/ ftp://" + opt_source_host + formattedSourceDir if rejectNameList != "": command = command + " -R " + rejectNameList if opt_source_ext != "": command = command + " -A " + opt_source_ext print command os.system(command) except (socket.error): print "Error occurred while trying to transfer file: ", filename print "Skipping file!" return # # set trigger file only when data files are updated. # if opt_generate_ldata_info: set_trigger_newerFTP(opt_output_dir, file_time_dict) return
def pull_file_ftp(): # # Create the output directory # create_dir(opt_output_dir) pmu.force_register("Listing file from output dir:" + opt_output_dir) # # construct a list of all gz files (used by wget to reject # files = os.listdir(opt_output_dir) print "list files" file_time_dict = {} rejectNameList = "" for f in files: if f.split(".")[len(f.split("."))-1] == "gz": zfile = f.split(".")[0] if opt_debug: print "gzipped file: ", zfile if zfile.isdigit: if rejectNameList == "": rejectNameList = zfile else: rejectNameList = rejectNameList + "," + zfile else: if opt_debug: print "A non gz file: ", f if f.startswith("_"): if opt_debug: print "A file starts with _: ", f else: file_path = opt_output_dir + "/" + f stats = os.stat(file_path) file_time_dict[f] = time.localtime(stats[8]) # # Pull the data file to the destination directory. # pmu.force_register("Retrieving file from remote dir: " + opt_source_dir) try: formattedSourceDir = "/" + opt_source_dir if opt_source_dir.startswith("/"): formattedSourceDir = opt_source_dir if not formattedSourceDir.endswith("/"): formattedSourceDir = formattedSourceDir + "/" numCutDir = formattedSourceDir.count("/") - 1 command = "wget -N -r -l1 --ftp-user="******" --ftp-password="******" --cut-dirs " + str(numCutDir) + " -nH -P" + opt_output_dir + "/ ftp://" + opt_source_host + formattedSourceDir if rejectNameList != "": command = command + " -R " + rejectNameList if opt_source_ext != "": command = command + " -A " + opt_source_ext print command os.system(command) except (socket.error): print "Error occurred while trying to transfer file: ", filename print "Skipping file!" return # # set trigger file only when data files are updated. # if opt_generate_ldata_info : set_trigger_newerFTP(opt_output_dir, file_time_dict) return
def pull_file(filename): pmu.auto_register("Checking for new files") # # Construct the local filenames. The temporary filename is used # for pulling the file down. When the retrieval is complete, the # file is renamed to its original name so that it appears atomically. # temp_filename = opt_temp_dir + '/.' + filename local_filename = opt_output_dir + '/' + filename if opt_debug: print "local_filename = " + local_filename print "temp_filename = " + temp_filename # # Check to see if we already have this file. If we do, then skip # it. # local_gz_filename = local_filename + ".gz" # # this is the case of the file that is gzipped on the ftp site # but unzipped locally # if (os.path.splitext(local_filename)[1] == ".gz"): if os.path.exists(os.path.splitext(local_filename)[0]): if opt_debug: print print "File already exists locally: " + local_filename print "Skipping retrieval...." return if os.path.exists(local_filename) or os.path.exists(local_gz_filename): if opt_debug: print print "File already exists locally: " + local_filename print "Skipping retrieval...." return if opt_debug: print "Getting file <" + filename + ">" # # Wait for the file to be quiescent # if opt_wait_for_quiescence: prev_file_size = 0 file_size = ftp.size(filename) while file_size != prev_file_size: if opt_debug: print "Waiting for file quiescence..." time.sleep(opt_quiescence_secs) prev_file_size = file_size file_size = ftp.size(filename) # # Pull the data file to the destination directory. # temp_file = open(temp_filename, 'wb') pmu.force_register("Retrieving file " + filename) try: ftp.retrbinary('RETR ' + filename, temp_file.write) except (socket.error): print "Socket error occurred while trying to transfer file: ", filename print "Skipping file!" return temp_file.close() if (os.path.splitext(temp_filename)[1] == ".gz"): filename = os.path.splitext(filename)[0] os.system("gunzip " + temp_filename) temp_filename = os.path.splitext(temp_filename)[0] local_filename = os.path.splitext(local_filename)[0] try: shutil.copyfile(temp_filename, local_filename) except (OSError): print "Error Copying temp file to local file\n" if (opt_write_Ldata): file_stats = os.stat(local_filename) data_file_time = datetime.fromtimestamp(file_stats[8]) Ldata_command = "LdataWriter -dir " + opt_output_dir + " -ext " + opt_Ldata_ext + " -dtype " + opt_Ldata_dtype + " -ltime " + data_file_time.strftime( "%Y%m%d%H%M%S") + " -rpath ./" + filename print print "LdataWriter command line:" print Ldata_command print os.system(Ldata_command) try: os.remove(temp_filename) except (OSError): print "Error removing temp file\n" return