def read_status(dirname, my_hash): ''' Reads a JSON status file whose name is defined by dirname and my_hash. ''' status = None file_path = dirname + "/" + my_hash + "-status.json" try: with open(file_path, 'r') as status_file: status = json.load(status_file) except IOError as err: if err.errno == 2: debug(2, "Status file {} not found".format(file_path)) else: raise return status
def read_status(dirname, my_hash): ''' Reads a JSON status file whose name is defined by dirname and my_hash. ''' status = None file_path = dirname + "/" + my_hash + "-status.json" try: with open(file_path, 'r') as status_file: status = json.load(status_file) except IOError as err: if err.errno == 2: debug(2, u"Status file {} not found".format(file_path)) else: raise return status
def update_status(self, progress): if time.clock() > self.last_time + 60: self.last_time = time.clock() starting_percentage = 94.0 / self.status_details[ 'num_series'] * self.status_details['current_series'] + 1 percentage = int( round((progress / float(self.numline) * 75 + 19) / self.status_details['num_series'] + starting_percentage)) debug(3, "Overall progress: {}%".format(percentage)) if self.status_details[ 'current_series'] == self.status_details['num_series'] - 1: minutes_remaining = int( round((time.clock() - self.start_time) / progress * (self.numline - progress) / 60)) debug(3, "Remaining: {} mins".format(minutes_remaining)) else: minutes_remaining = -1 update_status(self.status_details['dirname'], self.status_details['my_hash'], Plot_status.extracting, percentage=percentage, minutes_remaining=minutes_remaining) debug( 5, "Extracting: {}%".format( round(progress / float(self.numline) * 100, 3)))
def getFiles(self, slices_in_range, max_slices): files = [] total_requests = int(math.ceil(len(slices_in_range) / float(max_slices))) next_start = 0 for i in range(0, int(math.ceil(len(slices_in_range) / float(max_slices)))): start = slices_in_range[next_start].strftime('%Y-%m-%d %H:%M:%S') if next_start + max_slices <= len(slices_in_range): end_index = next_start + max_slices - 1 else: end_index = len(slices_in_range) - 1 end = slices_in_range[end_index].strftime('%Y-%m-%d %H:%M:%S') next_start = next_start + max_slices extract_dates = start + '/' + end wcs_extractor = WCSRawHelper(self.wcs_url, extract_dates, self.extract_variable, self.extract_area, self.extract_depth) # Generate the file name based on the request URL fname = self.outdir + hashlib.md5(wcs_extractor.generateGetCoverageUrl()).hexdigest() + ".nc" if not os.path.isfile(fname): # If the same request hasn't been downloaded before download_complete = False while not download_complete: if plotting: debug(3, "Making request {} of {}".format(i + 1, total_requests)) data = wcs_extractor.getData() # Generate a temporary file name to download to fname_temp = self.outdir + str(uuid.uuid4()) + ".nc" if plotting: debug(3,"Starting download {} of {}".format(i + 1, total_requests)) # Download in 16K chunks. This is most efficient for speed and RAM usage. chunk_size = 16 * 1024 with open(fname_temp, 'w') as outfile: while True: chunk = data.read(chunk_size) if not chunk: break outfile.write(chunk) try: netCDF.Dataset(fname_temp) download_complete = True except RuntimeError: if plotting: debug(3, "Download is corrupt. Retrying...") # Rename the file after it's finished downloading os.rename(fname_temp, fname) if plotting: self.update_status(i + 1, total_requests) files.append(fname) return files
def update_status(self, progress): if time.clock() > self.last_time + 60: self.last_time = time.clock() starting_percentage = 94.0 / self.status_details['num_series'] * self.status_details['current_series'] + 1 percentage = int(round((progress / float(self.numline) * 75 + 19) / self.status_details['num_series'] + starting_percentage)) debug(3, "Overall progress: {}%".format(percentage)) if self.status_details['current_series'] == self.status_details['num_series'] - 1: minutes_remaining = int(round((time.clock() - self.start_time) / progress * (self.numline - progress) / 60)) debug(3, "Remaining: {} mins".format(minutes_remaining)) else: minutes_remaining = -1 update_status(self.status_details['dirname'], self.status_details['my_hash'], Plot_status.extracting, percentage=percentage, minutes_remaining=minutes_remaining) debug(5, "Extracting: {}%".format(round(progress / float(self.numline) * 100, 3)))
def getData(self): if plotting: debug(2,"Getting coverage description...") coverage_description = self.getCoverageDescriptionData() max_slices = self.getMaxSlices(coverage_description['offset_vectors']) slices_in_range = self.getSlicesInRange(coverage_description['time_slices']) files = [] if slices_in_range: retries = 0 if plotting: debug(2, "Getting files...") while not files and retries < 4: try: files = self.getFiles(slices_in_range, max_slices) except urllib.error.HTTPError: max_slices = max_slices / 2 retries += 1 else: if plotting: debug(2, "No time slices in range.") return files
def getData(self): if plotting: debug(2,"Getting coverage description...") coverage_description = self.getCoverageDescriptionData() max_slices = self.getMaxSlices(coverage_description['offset_vectors']) slices_in_range = self.getSlicesInRange(coverage_description['time_slices']) files = [] if slices_in_range: retries = 0 if plotting: debug(2, "Getting files...") while not files and retries < 4: try: files = self.getFiles(slices_in_range, max_slices) except urllib2.HTTPError: max_slices = max_slices / 2 retries += 1 else: if plotting: debug(2, "No time slices in range.") return files
def process(self): if plotting: debug(2, "Extracting...") netcdf_file = netCDF.MFDataset(self.files, aggdim='time') time_var = netcdf_file.variables['time'] data_var = netcdf_file.variables[self.variable] times = time_var[:] if time_var.units: times = [ datetime.datetime.strptime( netCDF.num2date(x, time_var.units, calendar='standard').isoformat(), "%Y-%m-%dT%H:%M:%S") for x in times ] else: # the time variable doesn't have units; this can be caused by a thredds aggregation that uses dateFormatMark # to grab the date from the filename, in which case the date is an array of strings times = [ datetime.datetime.strptime(x.tostring(), "%Y-%m-%dT%H:%M:%SZ") for x in times ] with open(self._csv, "rb") as csvfile: csv_file = csvfile.read() with open(self._csv, "rb") as csvfile: self.numline = len(csvfile.readlines()) data = csv.DictReader(csv_file.splitlines(), delimiter=',') ret = [] times_sorted_indexes = np.argsort(times) times_sorted = np.sort(times) average_time_interval = (times_sorted[-1] - times_sorted[0]) / len( set(times_sorted)) lat_var = getCoordinateVariable(netcdf_file, "Lat")[:] lon_var = getCoordinateVariable(netcdf_file, "Lon")[:] if np.amax(lat_var) > 90: for i, lat in enumerate(lat_var): if lat > 90: lat_var[i] = lat - 180 if np.amax(lon_var) > 180: for i, lon in enumerate(lon_var): if lon > 180: lon_var[i] = lon - 360 lat_end = len(lat_var) - 1 lat_offset = (lat_var[lat_end] - lat_var[0]) / lat_end lon_end = len(lon_var) - 1 lon_offset = (lon_var[lon_end] - lon_var[0]) / lon_end # Calculate the distance from the centre of a pixel to a corner offset_distance = calculateDistance(0, 0, lat_offset, lon_offset) / 2 self.start_time = time.clock() self.last_time = time.clock() for row in data: if len(lat_var) <= 1: lat_index = 0 else: current_lat = float(row['Latitude']) t_lat = current_lat - lat_var[0] lat_index = int(round(abs(t_lat / lat_offset))) if len(lon_var) <= 1: lon_index = 0 else: current_lon = float(row['Longitude']) t_lon = current_lon - lon_var[0] lon_index = int(round(abs(t_lon / lon_offset))) try: track_date = datetime.datetime.strptime( row['Date'], "%d/%m/%Y %H:%M:%S") except ValueError: track_date = datetime.datetime.strptime( row['Date'], "%d/%m/%Y %H:%M") time_index = find_closest(times_sorted, track_date, arr_indexes=times_sorted_indexes, time=True, arr_sorted=True) if lat_index > lat_end: lat_index = lat_end if lon_index > lon_end: lon_index = lon_end # Calculate the distance from the desired point to the centre of the chosen pixel distance_from_desired = calculateDistance(current_lat, current_lon, lat_var[lat_index], lon_var[lon_index]) if distance_from_desired > offset_distance: # If the distance is greater than the offset distance then something has gone wrong # and the wrong pixel has been chosen. # Set the value to NaN to avoid returning an incorrect result data_value = float('nan') if plotting: debug( 0, "Incorrect pixel selected! Pixel at {:+07.3f}, {:+08.3f} is further than {:6.2f}km from point at {:+07.3f}, {:+08.3f} ({:8.2f}km). Setting {} value to NaN." .format(lat_var[lat_index], lon_var[lon_index], offset_distance, current_lat, current_lon, distance_from_desired, self.variable)) elif abs(times[time_index] - track_date) > (2 * average_time_interval): data_value = float('nan') elif len(data_var.dimensions) == 4: # If the file has a depth variable, use the first depth data_value = data_var[time_index][0][lat_index][lon_index] else: data_value = data_var[time_index][lat_index][lon_index] _ret = {} _ret['track_date'] = track_date.isoformat() if time_var.units: _ret['data_date'] = netCDF.num2date( time_var[time_index], time_var.units, calendar='standard').isoformat() else: _ret['data_date'] = time_var[time_index].tostring() if self.matchup: _ret['match_value'] = row['data_point'] _ret['track_lat'] = row['Latitude'] _ret['track_lon'] = row['Longitude'] _ret['data_value'] = float(data_value) if not np.isnan( float(data_value)) else "null" ret.append(_ret) if plotting and self.status_details: self.update_status(len(ret)) return ret
def process(self): if plotting: debug(2, "Extracting...") netcdf_file = netCDF.MFDataset(self.files, aggdim='time') time_var = netcdf_file.variables['time'] data_var = netcdf_file.variables[self.variable] times = time_var[:] if time_var.units: times = [datetime.datetime.strptime(netCDF.num2date(x, time_var.units, calendar='standard').isoformat(), "%Y-%m-%dT%H:%M:%S") for x in times] else: # the time variable doesn't have units; this can be caused by a thredds aggregation that uses dateFormatMark # to grab the date from the filename, in which case the date is an array of strings times = [datetime.datetime.strptime(x.tostring(), "%Y-%m-%dT%H:%M:%SZ") for x in times] with open(self._csv, "rb") as csvfile: csv_file = csvfile.read() with open(self._csv, "rb") as csvfile: self.numline = len(csvfile.readlines()) data = csv.DictReader(csv_file.splitlines(), delimiter=',') ret = [] times_sorted_indexes = np.argsort(times) times_sorted = np.sort(times) lat_var = getCoordinateVariable(netcdf_file, "Lat")[:] lon_var = getCoordinateVariable(netcdf_file, "Lon")[:] if np.amax(lat_var) > 90: for i, lat in enumerate(lat_var): if lat > 90: lat_var[i] = lat - 180 if np.amax(lon_var) > 180: for i, lon in enumerate(lon_var): if lon > 180: lon_var[i] = lon - 360 lat_end = len(lat_var) - 1 lat_offset = (lat_var[lat_end] - lat_var[0]) / lat_end lon_end = len(lon_var) - 1 lon_offset = (lon_var[lon_end] - lon_var[0]) / lon_end self.start_time = time.clock() self.last_time = time.clock() for row in data: if len(lat_var) <= 1: lat_index = 0 else: current_lat = float(row['Latitude']) t_lat = current_lat - lat_var[0] lat_index = int(round(abs(t_lat / lat_offset))) if len(lon_var) <= 1: lon_index = 0 else: current_lon = float(row['Longitude']) t_lon = current_lon - lon_var[0] lon_index = int(round(abs(t_lon / lon_offset))) track_date = datetime.datetime.strptime(row['Date'], "%d/%m/%Y %H:%M") time_index = find_closest(times_sorted, track_date, arr_indexes=times_sorted_indexes, time=True, arr_sorted=True) if lat_index > lat_end: lat_index = lat_end if lon_index > lon_end: lon_index = lat_end if len(data_var.dimensions) == 4: # If the file has a depth variable, use the first depth data = data_var[time_index][0][lat_index][lon_index] else: data = data_var[time_index][lat_index][lon_index] _ret = {} _ret['track_date'] = track_date.isoformat() if time_var.units: _ret['data_date'] = netCDF.num2date(time_var[time_index], time_var.units, calendar='standard').isoformat() else: _ret['data_date'] = time_var[time_index].tostring() if self.matchup: _ret['match_value'] = row['data_point'] _ret['track_lat'] = row['Latitude'] _ret['track_lon'] = row['Longitude'] _ret['data_value'] = float(data) if not np.isnan(float(data)) else "null" ret.append(_ret) if plotting and self.status_details: self.update_status(len(ret)) return ret
def update_status(self, progress, total_requests): starting_percentage = 94.0 / self.status_details['num_series'] * self.status_details['current_series'] + 1 percentage = int(round(progress / float(total_requests) * 19 / self.status_details['num_series'] + starting_percentage)) update_status(self.status_details['dirname'], self.status_details['my_hash'], Plot_status.extracting, percentage=percentage) debug(3, "Overall progress: {}%".format(percentage))
def process(self): if plotting: debug(2, "Extracting...") netcdf_file = netCDF.MFDataset(self.files, aggdim='time') time_var = netcdf_file.variables['time'] data_var = netcdf_file.variables[self.variable] times = time_var[:] if time_var.units: times = [datetime.datetime.strptime(netCDF.num2date(x, time_var.units, calendar='standard').isoformat(), "%Y-%m-%dT%H:%M:%S") for x in times] else: # the time variable doesn't have units; this can be caused by a thredds aggregation that uses dateFormatMark # to grab the date from the filename, in which case the date is an array of strings times = [datetime.datetime.strptime(x.tostring(), "%Y-%m-%dT%H:%M:%SZ") for x in times] with open(self._csv, "rb") as csvfile: csv_file = csvfile.read() with open(self._csv, "rb") as csvfile: self.numline = len(csvfile.readlines()) data = csv.DictReader(csv_file.splitlines(), delimiter=',') ret = [] times_sorted_indexes = np.argsort(times) times_sorted = np.sort(times) average_time_interval = (times_sorted[-1] - times_sorted[0]) / len(set(times_sorted)) lat_var = getCoordinateVariable(netcdf_file, "Lat")[:] lon_var = getCoordinateVariable(netcdf_file, "Lon")[:] if np.amax(lat_var) > 90: for i, lat in enumerate(lat_var): if lat > 90: lat_var[i] = lat - 180 if np.amax(lon_var) > 180: for i, lon in enumerate(lon_var): if lon > 180: lon_var[i] = lon - 360 lat_end = len(lat_var) - 1 lat_offset = (lat_var[lat_end] - lat_var[0]) / lat_end lon_end = len(lon_var) - 1 lon_offset = (lon_var[lon_end] - lon_var[0]) / lon_end # Calculate the distance from the centre of a pixel to a corner offset_distance = calculateDistance(0, 0, lat_offset, lon_offset) / 2 self.start_time = time.clock() self.last_time = time.clock() for row in data: if len(lat_var) <= 1: lat_index = 0 else: current_lat = float(row['Latitude']) t_lat = current_lat - lat_var[0] lat_index = int(round(abs(t_lat / lat_offset))) if len(lon_var) <= 1: lon_index = 0 else: current_lon = float(row['Longitude']) t_lon = current_lon - lon_var[0] lon_index = int(round(abs(t_lon / lon_offset))) try: track_date = datetime.datetime.strptime(row['Date'], "%d/%m/%Y %H:%M:%S") except ValueError: try: track_date = datetime.datetime.strptime(row['Date'], "%d/%m/%Y %H:%M") except ValueError: track_date = datetime.datetime.strptime(row['Date'], "%d/%m/%Y") time_index = find_closest(times_sorted, track_date, arr_indexes=times_sorted_indexes, time=True, arr_sorted=True) if lat_index > lat_end: lat_index = lat_end if lon_index > lon_end: lon_index = lon_end # Calculate the distance from the desired point to the centre of the chosen pixel distance_from_desired = calculateDistance(current_lat, current_lon, lat_var[lat_index], lon_var[lon_index]) if distance_from_desired > offset_distance: # If the distance is greater than the offset distance then something has gone wrong # and the wrong pixel has been chosen. # Set the value to NaN to avoid returning an incorrect result data_value = float('nan') if plotting: debug(0, "Incorrect pixel selected! Pixel at {:+07.3f}, {:+08.3f} is further than {:6.2f}km from point at {:+07.3f}, {:+08.3f} ({:8.2f}km). Setting {} value to NaN.".format( lat_var[lat_index], lon_var[lon_index], offset_distance, current_lat, current_lon, distance_from_desired, self.variable)) elif abs(times[time_index] - track_date) > (2 * average_time_interval): data_value = float('nan') elif len(data_var.dimensions) == 4: # If the file has a depth variable, use the first depth data_value = data_var[time_index][0][lat_index][lon_index] else: data_value = data_var[time_index][lat_index][lon_index] _ret = {} _ret['track_date'] = track_date.isoformat() if time_var.units: _ret['data_date'] = netCDF.num2date(time_var[time_index], time_var.units, calendar='standard').isoformat() else: _ret['data_date'] = time_var[time_index].tostring() if self.matchup: _ret['match_value'] = row['data_point'] _ret['track_lat'] = row['Latitude'] _ret['track_lon'] = row['Longitude'] _ret['data_value'] = float(data_value) if not np.isnan(float(data_value)) else "null" ret.append(_ret) if plotting and self.status_details: self.update_status(len(ret)) return ret
def update_status(dirname, my_hash, plot_status, message="", percentage=0, traceback="", base_url="", minutes_remaining=-1): ''' Updates a JSON status file whose name is defined by dirname and my_hash. ''' initial_status = dict( percentage = 0, state = plot_status, message = message, completed = False, traceback= traceback, job_id = my_hash, minutes_remaining = -1 ) # Read status file, create if not there. file_path = dirname + "/" + my_hash + "-status.json" try: with open(file_path, 'r') as status_file: if plot_status == Plot_status.initialising: status = initial_status else: status = json.load(status_file) except IOError as err: if err.errno == 2: debug(2, "Status file {} not found".format(file_path)) # It does not exist yet so create the initial JSON status = initial_status else: raise # Update the status information. status["message"] = message status["traceback"] = traceback status["state"] = plot_status if plot_status == Plot_status.complete: status["completed"] = True status['percentage'] = 100 status['minutes_remaining'] = 0 status['filename'] = dirname + "/" + my_hash + "-plot.html" status['csv'] = dirname + "/" + my_hash + ".zip" if base_url: status['csv_url'] = base_url + "/" + my_hash + ".zip" elif plot_status == Plot_status.failed: status["completed"] = True status['percentage'] = 100 status['minutes_remaining'] = 0 status['filename'] = None status['csv'] = None else: status["completed"] = False status['percentage'] = percentage status['minutes_remaining'] = minutes_remaining status['filename'] = None status['csv'] = None debug(4, "Status: {}".format(status)) # Write it back to the file. with open(file_path, 'w') as status_file: json.dump(status, status_file) return status
def update_status(dirname, my_hash, plot_status, message="", percentage=0, traceback="", base_url="", minutes_remaining=-1): ''' Updates a JSON status file whose name is defined by dirname and my_hash. ''' initial_status = dict( percentage = 0, state = plot_status, message = message, completed = False, traceback= traceback, job_id = my_hash, minutes_remaining = -1 ) # Read status file, create if not there. file_path = dirname + "/" + my_hash + "-status.json" try: with open(file_path, 'r') as status_file: if plot_status == Plot_status.initialising: status = initial_status else: status = json.load(status_file) except IOError as err: if err.errno == 2: debug(2, u"Status file {} not found".format(file_path)) # It does not exist yet so create the initial JSON status = initial_status else: raise # Update the status information. status["message"] = message status["traceback"] = traceback status["state"] = plot_status if plot_status == Plot_status.complete: status["completed"] = True status['percentage'] = 100 status['minutes_remaining'] = 0 status['filename'] = dirname + "/" + my_hash + "-plot.html" status['csv'] = dirname + "/" + my_hash + ".zip" if base_url: status['csv_url'] = base_url + "/" + my_hash + ".zip" elif plot_status == Plot_status.failed: status["completed"] = True status['percentage'] = 100 status['minutes_remaining'] = 0 status['filename'] = None status['csv'] = None else: status["completed"] = False status['percentage'] = percentage status['minutes_remaining'] = minutes_remaining status['filename'] = None status['csv'] = None debug(4, u"Status: {}".format(status)) # Write it back to the file. with open(file_path, 'w') as status_file: json.dump(status, status_file) return status