def combine(bats_file, event_sum_file): """Combines the given BATS .dpr file with the Summary event.log file so that the DataFile contains most of the information from both. """ # It is pretty much given that the data is CTD. lat, lng = bats_file.globals['LATITUDE'], bats_file.globals['LONGITUDE'] # Find the event log record sum_file_i = None for i in range(len(event_sum_file)): sumlat, sumlng = event_sum_file['LATITUDE'][i], event_sum_file[ 'LONGITUDE'][i] epsilon = Decimal('1e-3') close_enough = equal_with_epsilon(lat, sumlat, epsilon) and \ equal_with_epsilon(lng, sumlng, epsilon) if close_enough: sum_file_i = i break if sum_file_i is None: log.error('Event for BATS data at %f %f not found' % (lat, lng)) return headers = event_sum_file.column_headers() row = event_sum_file.row(i) info = dict(zip(headers, row)) bats_file.globals['DEPTH'] = info['DEPTH']
def salinity_back_from_in_situ_density(temp, salty, press, stpned): '''Back calculate salinity using in situ density. Args: temp in situ temperature. salty a first guess at salinity. press pressure in decibars. stpned the sigma stp at the point in question calculated as: call sigma_r(p,p,t,s,stp) Return: sal the resultant back calculated salinity. ''' # Check for missing parameters. if _any_missing(temp, salty, stpned, press): return CDMISS plus = 0.04 for i in range(1, 20): # Calculate STP at the guess salinity stpcal = sigma_r(press, press, temp, salty) # Add a little bit and calculate it again. salx = salty + plus stpx = sigma_r(press,press,temp,salx) # Figure the gradient. grad = plus / (stpx - stpcal) # Figure how much to add to the guessed salinity. saladd = grad * (stpned - stpcal) salnew = salty + saladd if abs(salnew) < 1000.0: # Guess isn't too ridiculous. if equal_with_epsilon(salnew, salty, TOLER): # succesive sal. approximations are very close. stop. return salnew else: salty = salnew else: print (' Numerical problem in salinity_back_from_in_situ_density. ' 'Setting sal to missing.') return CDMISS if equal_with_epsilon(stpned, stpcal, TOLER): # resultant sigma is close enough. sal = (salty + salnew) / 2.0 print ' averaging sal.=%f' % sal return sal else: return CDMISS
def simplest_str(s): """Give the simplest string representation. If a float is almost equivalent to an integer, swap out for the integer. """ if type(s) is float: if fns.equal_with_epsilon(s, int(s)): s = int(s) return str(s)
def sigma_r(refprs, press, temp, salty): '''Calculate density using international equation of state From text furnished by J. Gieskes Args: press -- pressure in decibars temp -- temperature in celsius degrees salty -- salinity PSS 78 refprs -- reference pressure refprs = 0. : sigma theta refprs = press: sigma z Return: kg/m*3 - 1000.0 ''' # check for missing data if _any_missing(temp, press, salty): return CDMISS # calculate potential temperature if press != refprs: potemp = potential_temperature(press, temp, salty, refprs) else: potemp = temp # sigma theta kg/m**3 sigma = (rho_w(potemp) + kw(potemp, salty) + k_st0(salty, potemp) + _decimal(4.8314e-4) * salty ** 2) if equal_with_epsilon(refprs, 0.0): return sigma - _decimal(1000.0) # Calculate pressure effect # # rho(s,t,0)/(1.0-p/k(s,t,p)) # kst0 = secant_bulk_modulus(abs(salty), potemp, 0) # reference pressure in bars bars = refprs * 0.1 # Calculate pressure terms terma = polynomial(potemp, (3.239908, 0.00143713, 1.16092e-4, -5.77905e-7)) + \ polynomial(potemp, (0.0022838, -1.0981e-5, -1.6078e-6)) * salty + \ 1.91075e-4 * abs(salty) ** 1.5 termb = polynomial(potemp, (8.50935e-5, -6.12293e-6, 5.2787e-8)) + \ polynomial(potemp, (-9.9348e-7, 2.0816e-8, 9.1697e-10)) * salty # Secant bulk modulus k(s,t,p) */ kstp = polynomial(bars, (kst0, terma, termb)) return sigma / (1.0 - bars / kstp) - 1000.0
def fp_eq(a, b): return equal_with_epsilon(a, b, 1e-5)
def salinity(press, potemp, sig): '''Calculate salinity using international equation of state and back calculating the sigma theta portion of sigma_r(). From text furnished by J. Gieskes Args: press -- pressure in decibars potemp -- potential temperature in celsius degrees sig -- kg/m*3 - 1000.0 Return: sal -- salinity PSS 78 ''' if _any_missing(press, sig, pt): return CDMISS # Calculate density at given salinity, temp and pressure=0.0 # First approximation salty = 34.5 rhow = rho_w(potemp) kw_ = kw(potemp, salty) for i in range(1, 20): # get derivative of kw with respect to salinity dkwdsl = kw_ / salty + 0.824493 kst0 = k_st0(salty, potemp) # get derivative of kst0 dkst0 = kst0 / abs(salty) ** 1.5 * 1.5 * sqrt(salty) # sigma theta kg/m**3 sigma = rhow + kw_ + kst0 + 4.8314e-4 * salty ** 2 # get derivative of sigma theta dsig0 = dkwdsl + dkst0 + 9.6628e-4 * salty # get next approximation to salinity sigma -= 1000.0 f = sigma - sig dfdt = dsig0 salnew = salty - f / dfdt if abs(salnew) < 2000.0: # guess isn't too ridiculous. if equal_with_epsilon(salnew, salty, TOLER): # succesive sal. approximations are very close. stop. return salnew else: salty = salnew else: print ' Numerical problem in salinity. Setting sal to missing.' return CDMISS if abs(f) < TOLER: # resultant sigma is close enough. sal = (salty + salnew) / 2.0 print ' averaging sal.=%f' % sal return sal else: return CDMISS
def _missing(x): return equal_with_epsilon(x, CDMISS)
def read(self, handle): """How to read CTD Bonus Goodhope files from a TAR.""" lines = handle.readlines() line0 = lines[0].split() sect_id = line0[1] station = str(int(line0[0])) line3 = lines[3].split() lattoks = [line3[3], line3[4], line3[2]] lontoks = [line3[6], line3[7], line3[5]] try: latitude = ddm_to_dd(lattoks) except ValueError: latitude = ddm_to_dd([lattoks[1], lattoks[2], lattoks[0]]) try: longitude = ddm_to_dd(lontoks) except ValueError: longitude = ddm_to_dd([lontoks[1], lontoks[2], lontoks[0]]) date = line3[0] time = line3[1].zfill(4) depth = line3[10] self.globals['EXPOCODE'] = None self.globals['SECT_ID'] = sect_id self.globals['STNNBR'] = station self.globals['CASTNO'] = '1' self.globals['LATITUDE'] = latitude self.globals['LONGITUDE'] = longitude self.globals['DEPTH'] = depth self.globals['_DATETIME'] = datetime.strptime(date + time, '%d%m%Y%H%M') param_units = [ ['CTDPRS', 'DBAR'], ['CTDTMP', 'ITS-90'], ['CTDSAL', 'PSS-78'], ['CTDOXY', 'UMOL/KG'], ['THETA', 'DEG C'], ['DEPTH', 'METERS'], ['SIG0', 'KG/M^3'], ['GAMMA', 'KG/M^3'], ] columns = [] units = [] for p, u in param_units: columns.append(p) units.append(u) self.create_columns(columns, units) data = lines[14:] for l in data: for i, v in enumerate(map(float, l.split())): v = _decimal(v) flag_woce = 2 if equal_with_epsilon(v, 9.0): v = None flag_woce = 9 self[columns[i]].append(v, flag_woce=flag_woce) self.check_and_replace_parameters()
def australian_navy_ctd(args): """Download and convert Australian Navy CTD data.""" from pydap.client import open_url from libcchdo.thredds import crawl from libcchdo.formats.ctd.zip import exchange as ctdzipex from libcchdo.formats.zip import write as zwrite dfcs = [] cf_param_to_cchdo_param = { 'sea_water_pressure': 'CTDPRS', 'sea_water_temperature': 'CTDTMP', 'sea_water_practical_salinity': 'CTDSAL', } ignored_qc_flags = [ 'time_qc_flag', 'position_qc_flag', ] qc_conventions = { 'Proposed IODE qc scheme March 2012': { 1: 2, # good 2: 5, # not_evaluated_or_unknown 3: 3, # suspect 4: 4, # bad 9: 9, # missing }, } dfc = DataFileCollection() catalog = "http://www.metoc.gov.au/thredds/catalog/RAN_CTD_DATA/catalog.xml" for url in crawl(catalog): df = DataFile() log.info(u'Reading %s', url) dset = open_url(url) vars = dset.keys() for vname in vars: var = dset[vname] attrs = var.attributes if 'standard_name' in attrs: std_name = attrs['standard_name'] if std_name == 'time': df.globals['_DATETIME'] = \ datetime(1950, 1, 1) + timedelta(var[:]) elif std_name == 'latitude': df.globals['LATITUDE'] = var[:] elif std_name == 'longitude': df.globals['LONGITUDE'] = var[:] elif std_name in cf_param_to_cchdo_param: cparam = cf_param_to_cchdo_param[std_name] if '_FillValue' in attrs: fill_value = attrs['_FillValue'] values = [] for x in var[:]: if equal_with_epsilon(x, fill_value): values.append(None) else: values.append(x) else: values = var[:] try: df[cparam].values = values except KeyError: df[cparam] = Column(cparam) df[cparam].values = values elif 'status_flag' in std_name: flagged_param = std_name.replace('status_flag', '').strip() cparam = cf_param_to_cchdo_param[flagged_param] qc_convention = attrs['quality_control_convention'] if qc_convention in qc_conventions: qc_map = qc_conventions[qc_convention] df[cparam].flags_woce = [qc_map[x] for x in var[:]] else: log.debug('unhandled standard_name %s', std_name) elif ('long_name' in attrs and attrs['long_name'] == 'profile identifier'): profile_id = var[:] cruise_id = profile_id / 10**4 profile_id = profile_id - cruise_id * 10**4 df.globals['EXPOCODE'] = str(cruise_id) df.globals['STNNBR'] = str(profile_id) df.globals['CASTNO'] = str(1) elif vname in ignored_qc_flags: df.globals['_' + vname] = var[:] elif (vname.endswith('whole_profile_flag') or vname.endswith('sd_test')): pass else: log.debug('unhandled variable %s', vname) # attach new file to appropriate collection if dfc.files: if dfc.files[0].globals['EXPOCODE'] != df.globals['EXPOCODE']: dfcs.append(dfc) dfc = DataFileCollection() dfc.append(df) with closing(args.output) as out_file: next_id = 0 def get_filename(dfc): try: return '{0}_ct1.zip'.format(dfc.files[0].globals['EXPOCODE']) except IndexError: next_id += 1 return '{0}_ct1.zip'.format(next_id) zwrite(dfcs, out_file, ctdzipex, get_filename)
def test_equal_with_epsilon(self): self.assertTrue(fns.equal_with_epsilon(1, 1 + 1e-7)) self.assertFalse(fns.equal_with_epsilon(1, 1 + 1e-5)) self.assertFalse(fns.equal_with_epsilon(1, 1 + 1e-7, 1e-7)) self.assertTrue(fns.equal_with_epsilon(1, 1 + 1e-7, 1e-6))
def _decimal_check_missing(str): """Convert str to a decimal or None if matches dpr fill value.""" x = _decimal(str) if equal_with_epsilon(x, -9.99) or equal_with_epsilon(x, -10): return None return x
def read(self, handle, metadata=None): """How to read a Bottle Bermuda Atlantic Time-Series Study file. This function reads bats_bottle.txt. Arguments: self - (special case, see NOTE) dictionary metadata - (optional) BATS cruise metadata to be used to find port dates NOTE: The result for this method is a special case. The bottle file format contains the entire BATS holdings while the internal data format splits data up by cruises. Because cruises for timeseries are split by file for cruise, the end result is a dictionary with cruise_ids as keys to DatafileCollections (cruises) containing Datafiles (casts). """ sections = _read_header_sections(self, handle) _read_variables(self, handle) parameters = _get_variables(self, handle, sections) # Add DON for note in Variables list stating DON is reported for TON prior # to BATS 121 parameters.append(['DON', None, 'umol/kg']) manual_parameters = [ ['BTLNBR', ''], ['_DATETIME', ''], ['LATITUDE', ''], ['LONGITUDE', ''], ['_ACTUAL_DEPTH', 'METERS'], ] columns = [x[0] for x in manual_parameters] units = [x[1] for x in manual_parameters] s = None for i, (var, d, u) in enumerate(parameters): if var == 'Depth': s = i + 1 continue # Only want to add parameters after Depth. The others were done manually. if s is None: continue try: var = bats_to_param[var] except KeyError: pass columns.append(var) units.append(u) template_df = DataFile() template_df.create_columns(columns, units) template_df.check_and_replace_parameters(convert=False) for sec, lines in sections.items(): if sec == 'Variables list': continue if sec != 'Comments': continue template_df.globals['_{0}'.format(sec)] = '\n'.join(lines) df = None params_auto = parameters[s:] dfi = 0 for i, l in enumerate(handle): parts = l.split() id = parts[0] (cruise_type, type_id, cruise_num, cruise_id, cast_type, cast_id, nisk_id) = _parse_bats_id(id) ship = _ship_from_cruise_num(cruise_num) if not ship: ship = 'R/V Atlantic Explorer' if (df is None or df.globals['_OS_ID'] != cruise_id or df.globals['STNNBR'] != cruise_type or df.globals['CASTNO'] != cast_id): if df is not None: # Done reading one cast. Finalize it. log.info(u'finalizing cast {0} {1} {2}'.format( df.globals['_OS_ID'], df.globals['STNNBR'], df.globals['CASTNO'])) try: meta = metadata[cruise_id] port_date = meta['dates'][0] except (TypeError, KeyError): port_date = None if not port_date: port_date = min(df['_DATETIME']) df.globals['EXPOCODE'] = create_expocode( ship_code(ship, raise_on_unknown=False), port_date) log.info(df.globals['EXPOCODE']) df.globals['DEPTH'] = max(df['_ACTUAL_DEPTH']) collapse_globals(df, ['_DATETIME', 'LATITUDE', 'LONGITUDE']) # Normalize all the parameter column lengths. There may be # columns that did not get data written to them so make sure # they are just as long as the rest length = len(df) for c in df.columns.values(): c.set_length(length) try: dfc = self[df.globals['_OS_ID']] except KeyError: dfc = self[df.globals['_OS_ID']] = DataFileCollection() dfc.files.append(df) dfi = 0 # Create a new cast df = copy(template_df) df.globals['SECT_ID'] = BATS_SECT_ID df.globals['_SHIP'] = ship df.globals['_OS_ID'] = cruise_id df.globals['STNNBR'] = cruise_type df.globals['CASTNO'] = cast_id df['BTLNBR'].set(dfi, nisk_id) dt_ascii = datetime.strptime(parts[1] + parts[3], '%Y%m%d%H%M') dt_deci = bats_time_to_dt(parts[2]) #if dt_ascii != dt_deci: # log.warn( # u'Dates differ on data row {0}: {5} {1!r}={2} ' # '{3!r}={4}'.format(i, parts[1] + parts[3], dt_ascii, parts[2], # dt_deci, dt_deci - dt_ascii)) df['_DATETIME'].set(dfi, dt_ascii) df['LATITUDE'].set(dfi, Decimal(parts[4])) df['LONGITUDE'].set(dfi, Decimal(correct_longitude(parts[5]))) df['_ACTUAL_DEPTH'].set_check_range(dfi, Decimal(parts[6])) parts_auto = parts[s:] for p, v in zip(params_auto, parts_auto): param = p[0] try: param = bats_to_param[param] except KeyError: pass if cruise_num < 121 and param == 'TON': param = 'DON' if (equal_with_epsilon(v, -9) or equal_with_epsilon(v, -9.9) or equal_with_epsilon(v, -9.99)): df[param].set_check_range(dfi, None) # TODO determine whether -10 is just bad formatting for -9.9 elif equal_with_epsilon(v, -10): #log.warn(u'Possible missing data value {0}'.format(v)) df[param].set_check_range(dfi, None) elif v == 0: log.warn(u'Data under detection limit, set flag to ' 'WOCE water sample questionable measurement') df[param].set_check_range(dfi, None, flag=3) else: df[param].set_check_range(dfi, Decimal(v)) dfi += 1 # Since this is a super long file that contains multiple cruises and # casts, as the file is processed it is split apart into a list of # DataFileCollection(s) containing DataFile objects for each casts if i % 100 == 0: log.info(u'processed {0} lines'.format(i))