def main(): parser = argparse.ArgumentParser(description='GNSS time series stacker') parser.add_argument( 'project_file', type=str, nargs=1, metavar='{project cfg file}', help= "Project CFG file with all the stations being processed in Parallel.GAMIT" ) args = parser.parse_args() cnn = dbConnection.Cnn("gnss_data.cfg") GamitConfig = GamitConfiguration( args.project_file[0], check_config=False) # type: GamitConfiguration stations = station_list( cnn, GamitConfig.NetworkConfig, [Date(year=1999, doy=100), Date(year=1999, doy=128)]) # split the stations into subnet_count subnetworks archive = pyArchiveStruct.RinexStruct( cnn) # type: pyArchiveStruct.RinexStruct net_object = pyNetwork.Network(cnn, archive, GamitConfig, stations, Date(year=1999, doy=128)) generate_kml([Date(year=1999, doy=128)], net_object.sessions, GamitConfig)
def DeleteRinex(cnn, stnlist, start_date, end_date, completion_limit=0.0): Archive = pyArchiveStruct.RinexStruct(cnn) for stn in stnlist: NetworkCode = stn['NetworkCode'] StationCode = stn['StationCode'] rs = cnn.query( 'SELECT * FROM rinex WHERE "NetworkCode" = \'%s\' AND "StationCode" = \'%s\' ' 'AND "ObservationFYear" BETWEEN %f AND %f AND "Completion" <= %f' % (NetworkCode, StationCode, start_date.fyear, end_date.fyear, completion_limit)) rinex = rs.dictresult() tqdm.write( ' >> Deleting %i RINEX files and solutions for %s.%s between %s - %s and completion <= %.3f' % (len(rinex), NetworkCode, StationCode, start_date.yyyyddd(), end_date.yyyyddd(), completion_limit)) for rnx in tqdm(rinex): try: # delete rinex file (also deletes PPP and GAMIT solutions) Archive.remove_rinex(rnx) except dbConnection.dbErrDelete as e: tqdm.write( 'Failed to delete solutions and/or RINEX files for %i %i. Reason: %s\n' % (rnx['Year'], rnx['DOY'], str(e)))
def check_rinex_stn(NetworkCode, StationCode, start_date, end_date): # load the connection try: # try to open a connection to the database cnn = dbConnection.Cnn("gnss_data.cfg") Config = pyOptions.ReadOptions("gnss_data.cfg") except Exception: return traceback.format_exc() + ' processing: (' + NetworkCode + '.' + StationCode \ + ') using node ' + platform.node(), None try: Archive = pyArchiveStruct.RinexStruct(cnn) rs = cnn.query('SELECT * FROM rinex WHERE "NetworkCode" = \'%s\' AND ' '"StationCode" = \'%s\' AND ' '"ObservationSTime" BETWEEN \'%s\' AND \'%s\' ' 'ORDER BY "ObservationSTime"' % (NetworkCode, StationCode, start_date.yyyymmdd(), end_date.yyyymmdd())) rnxtbl = rs.dictresult() missing_files = [] for rnx in rnxtbl: crinex_path = os.path.join( Config.archive_path, Archive.build_rinex_path(NetworkCode, StationCode, rnx['ObservationYear'], rnx['ObservationDOY'], filename=rnx['Filename'])) if not os.path.exists(crinex_path): # problem with file! does not appear to be in the archive Archive.remove_rinex(rnx) event = pyEvents.Event( Description= 'A missing RINEX file was found during RINEX integrity check: ' + crinex_path + '. It has been removed from the database. Consider rerunning PPP for this station.', NetworkCode=NetworkCode, StationCode=StationCode, Year=rnx['ObservationYear'], DOY=rnx['ObservationDOY']) cnn.insert_event(event) missing_files += [crinex_path] return None, missing_files except Exception: return traceback.format_exc() + ' processing: ' + NetworkCode + '.' + \ StationCode + ' using node ' + platform.node(), None
def UpdateRecord(rinex, path): cnn = dbConnection.Cnn('gnss_data.cfg') Config = pyOptions.ReadOptions('gnss_data.cfg') try: rnxobj = pyRinex.ReadRinex(rinex['NetworkCode'], rinex['StationCode'], path) date = pyDate.Date(year=rinex['ObservationYear'], doy=rinex['ObservationDOY']) if not verify_rinex_date_multiday(date, rnxobj, Config): cnn.begin_transac() # propagate the deletes cnn.query( 'DELETE FROM gamit_soln WHERE "NetworkCode" = \'%s\' AND "StationCode" = \'%s\' AND "Year" = %i AND "DOY" = %i' % (rinex['NetworkCode'], rinex['StationCode'], rinex['ObservationYear'], rinex['ObservationDOY'])) cnn.query( 'DELETE FROM ppp_soln WHERE "NetworkCode" = \'%s\' AND "StationCode" = \'%s\' AND "Year" = %i AND "DOY" = %i' % (rinex['NetworkCode'], rinex['StationCode'], rinex['ObservationYear'], rinex['ObservationDOY'])) cnn.query( 'DELETE FROM rinex WHERE "NetworkCode" = \'%s\' AND "StationCode" = \'%s\' AND "ObservationYear" = %i AND "ObservationDOY" = %i' % (rinex['NetworkCode'], rinex['StationCode'], rinex['ObservationYear'], rinex['ObservationDOY'])) cnn.commit_transac() return 'Multiday rinex file moved out of the archive: ' + rinex[ 'NetworkCode'] + '.' + rinex['StationCode'] + ' ' + str( rinex['ObservationYear']) + ' ' + str( rinex['ObservationDOY'] ) + ' using node ' + platform.node() else: cnn.update('rinex', rinex, Completion=rnxobj.completion) except pyRinex.pyRinexExceptionBadFile: # empty file or problem with crinex format, move out archive = pyArchiveStruct.RinexStruct(cnn) archive.remove_rinex( rinex, os.path.join( Config.repository_data_reject, 'bad_rinex/%i/%03i' % (rinex['ObservationYear'], rinex['ObservationDOY']))) except Exception: return traceback.format_exc( ) + ' processing rinex: ' + rinex['NetworkCode'] + '.' + rinex[ 'StationCode'] + ' ' + str(rinex['ObservationYear']) + ' ' + str( rinex['ObservationDOY']) + ' using node ' + platform.node()
def main(): Config = pyOptions.ReadOptions('gnss_data.cfg') JobServer = pyJobServer.JobServer(Config) # type: pyJobServer.JobServer cnn = dbConnection.Cnn('gnss_data.cfg') archive = pyArchiveStruct.RinexStruct(cnn) rinex = cnn.query_float( 'SELECT * FROM rinex WHERE "ObservationYear" <= 1995 ORDER BY "NetworkCode", ' '"StationCode", "ObservationYear", "ObservationDOY"', as_dict=True) pbar = tqdm(desc='%-30s' % ' >> Processing rinex files', total=len(rinex), ncols=160) modules = ('os', 'pyRinex') callback = [] for rnx in rinex: filename = archive.build_rinex_path(rnx['NetworkCode'], rnx['StationCode'], rnx['ObservationYear'], rnx['ObservationDOY'], filename=rnx['Filename']) arguments = (rnx['NetworkCode'], rnx['StationCode'], Config.archive_path, filename) JobServer.SubmitJob(check_rinex, arguments, (), modules, callback, callback_class(pbar), 'callbackfunc') if JobServer.process_callback: # handle any output messages during this batch callback = output_handle(callback) JobServer.process_callback = False tqdm.write(' >> waiting for jobs to finish...') JobServer.job_server.wait() tqdm.write(' >> Done.') # process the errors and the new stations output_handle(callback) pbar.close()
def rinex_task(NetworkCode, StationCode, date, ObservationFYear, metafile): from pyRunWithRetry import RunCommandWithRetryExeception etm_err = '' # local directory as destiny for the CRINEZ files pwd_rinex = '/media/leleiona/aws-files/' + date.yyyy() + '/' + date.ddd() stop_no_aprs = False Config = pyOptions.ReadOptions( "gnss_data.cfg") # type: pyOptions.ReadOptions cnn = dbConnection.Cnn('gnss_data.cfg') # create Archive object Archive = pyArchiveStruct.RinexStruct( cnn) # type: pyArchiveStruct.RinexStruct ArchiveFile = Archive.build_rinex_path(NetworkCode, StationCode, date.year, date.doy) ArchiveFile = os.path.join(Config.archive_path, ArchiveFile) # check for a station alias in the alias table alias = cnn.query( 'SELECT * FROM stationalias WHERE "NetworkCode" = \'%s\' AND "StationCode" = \'%s\'' % (NetworkCode, StationCode)) sa = alias.dictresult() if len(sa) > 0: StationAlias = sa[0]['StationAlias'] else: StationAlias = StationCode # create the crinez filename filename = StationAlias + date.ddd() + '0.' + date.yyyy()[2:4] + 'd.Z' try: # create the ETM object etm = pyETM.PPPETM(cnn, NetworkCode, StationCode) # get APRs and sigmas (only in NEU) Apr, sigmas, Window, source = etm.get_xyz_s(date.year, date.doy) del etm except pyETM.pyETMException as e: # no PPP solutions available! MUST have aprs in the last run, try that stop_no_aprs = True Window = None source = '' etm_err = str(e) except Exception: return (None, None, traceback.format_exc() + ' processing ' + NetworkCode + '.' + StationCode + ' using node ' + platform.node() + '\n', metafile) # find this station-day in the lastest global run APRs apr_tbl = cnn.query( 'SELECT * FROM apr_coords WHERE "NetworkCode" = \'%s\' AND "StationCode" = \'%s\' ' 'AND "Year" = %i AND "DOY" = %i' % (NetworkCode, StationCode, date.year, date.doy)) apr = apr_tbl.dictresult() if len(apr) > 0: # APRs exist for this station-day # replace PPP ETM with Mike's APRs Apr = numpy.array( ([float(apr[0]['x'])], [float(apr[0]['y'])], [float(apr[0]['z'])])) sigmas = numpy.array(([float(apr[0]['sn'])], [float(apr[0]['se'])], [float(apr[0]['su'])])) source = apr[0]['ReferenceFrame'] + ' APRs' elif len(apr) == 0 and stop_no_aprs: return ( None, None, '%s.%s has no PPP solutions and no APRs from last global run for %s! ' 'Specific error from pyETM.PPPETM (if available) was: %s' % (NetworkCode, StationCode, date.yyyyddd(), etm_err), metafile) # convert sigmas to XYZ stn = cnn.query( 'SELECT * FROM stations WHERE "NetworkCode" = \'%s\' AND "StationCode" = \'%s\'' % (NetworkCode, StationCode)) stn = stn.dictresult() sigmas_xyz = sigmas_neu2xyz(stn[0]['lat'], stn[0]['lon'], sigmas) # write the station.info # if no station info comes back for this date, program will print a message and continue with next try: # Use the argument 'ObservationFYear' to get the exact RINEX session fyear without opening the file rnx_date = pyDate.Date(fyear=float(ObservationFYear)) stninfo = pyStationInfo.StationInfo(cnn, NetworkCode, StationCode, rnx_date, h_tolerance=12) except pyStationInfo.pyStationInfoException: # if no metadata, warn user and continue return ( None, None, '%s.%s has no metadata available for this date, but a RINEX exists!' % (NetworkCode, StationCode), metafile) # check if RINEX file needs to be synced or not. aws_sync = cnn.query( 'SELECT * FROM aws_sync WHERE "NetworkCode" = \'%s\' AND "StationCode" = \'%s\' ' 'AND "Year" = %i AND "DOY" = %i' % (NetworkCode, StationCode, date.year, date.doy)).dictresult() if len(aws_sync) == 0: # only copy RINEX if not synced! # open the RINEX file in the Archive try: with pyRinex.ReadRinex(NetworkCode, StationCode, ArchiveFile, False) as Rinex: # type: pyRinex.ReadRinex Rnx = None if Rinex.multiday: # find the rinex that corresponds to the session being processed, if multiday for rinex in Rinex.multiday_rnx_list: if rinex.date == date: Rnx = rinex break if Rnx is None: return ( None, None, '%s.%s was a multiday file and date %8.3f could not be found!' % (NetworkCode, StationCode, date.fyear), metafile) else: # if Rinex is not multiday Rnx = Rinex Rnx.purge_comments() Rnx.normalize_header(stninfo) Rnx.rename(filename) if Window is not None: window_rinex(Rnx, Window) source += ' windowed from/to ' + Window.datetime( ).strftime('%Y-%M-%d %H:%M:%S') # before creating local copy, decimate file Rnx.decimate(30) Rnx.compress_local_copyto(pwd_rinex) except (pyRinex.pyRinexException, RunCommandWithRetryExeception): # new behavior: if error occurs while generating RINEX, then copy raw file from the archive try: shutil.copy(ArchiveFile, os.path.join(pwd_rinex, filename)) except Exception: return (None, None, traceback.format_exc() + ' processing ' + NetworkCode + '.' + StationCode + ' using node ' + platform.node() + '\n', metafile) except Exception: return (None, None, traceback.format_exc() + ' processing ' + NetworkCode + '.' + StationCode + ' using node ' + platform.node() + '\n', metafile) # everything ok, return information APR = '%s.%s %s %12.3f %12.3f %12.3f %5.3f %5.3f %5.3f %5.3f %5.3f %5.3f %s' % ( NetworkCode, StationCode, StationAlias, Apr[0, 0], Apr[1, 0], Apr[2, 0], sigmas_xyz[0, 0], sigmas_xyz[1, 0], sigmas_xyz[2, 0], sigmas[1, 0], sigmas[0, 0], sigmas[2, 0], source.replace(' ', '_')) return APR, stninfo.return_stninfo().replace( StationCode.upper(), StationAlias.upper()), None, metafile
def download_data(cnn, Config, stnlist, drange): archive = pyArchiveStruct.RinexStruct(cnn) pbar = tqdm(desc='%-30s' % ' >> Downloading stations declared in data_source', total=len(drange) * len(stnlist), ncols=160) for date in [pyDate.Date(mjd=mdj) for mdj in drange]: for stn in stnlist: StationCode = stn['StationCode'] NetworkCode = stn['NetworkCode'] pbar.set_postfix(current='%s.%s %s' % (NetworkCode, StationCode, date.yyyyddd())) pbar.update() try: _ = pyStationInfo.StationInfo(cnn, NetworkCode, StationCode, date=date) except pyStationInfo.pyStationInfoHeightCodeNotFound: # if the error is that no height code is found, then there is a record pass except pyStationInfo.pyStationInfoException: # no possible data here, inform and skip tqdm.write( ' >> %s.%s skipped: no station information available -> assume station is inactive' % (NetworkCode, StationCode)) continue rinex = archive.get_rinex_record(NetworkCode=NetworkCode, StationCode=StationCode, ObservationYear=date.year, ObservationDOY=date.doy) if not rinex: download = True elif rinex and rinex[0]['Completion'] < 0.5: download = True else: download = False if download: rs = cnn.query( 'SELECT * FROM data_source WHERE "NetworkCode" = \'%s\' ' 'AND "StationCode" = \'%s\' ORDER BY try_order' % (NetworkCode, StationCode)) sources = rs.dictresult() for source in sources: tqdm.write(' >> Need to download %s.%s %s' % (NetworkCode, StationCode, date.yyyyddd())) result = False folder = os.path.dirname( replace_vars(source['path'], date, StationCode)) destiny = os.path.join(Config.repository_data_in, source['fqdn'].replace(':', '_')) filename = os.path.basename( replace_vars(source['path'], date, StationCode)) if source['protocol'].lower() == 'ftp': result = download_ftp(source['fqdn'], source['username'], source['password'], folder, destiny, filename) elif source['protocol'].lower() == 'sftp': result = download_sftp(source['fqdn'], source['username'], source['password'], folder, destiny, filename) elif source['protocol'].lower() == 'http': result = download_http(source['fqdn'], folder, destiny, filename) else: tqdm.write(' -- Unknown protocol %s for %s.%s' % (source['protocol'].lower(), NetworkCode, StationCode)) if result: tqdm.write(' -- Successful download of %s.%s %s' % (NetworkCode, StationCode, date.yyyyddd())) # success downloading file if source['format']: tqdm.write( ' -- File requires postprocess using scheme %s' % (source['format'])) process_file(os.path.join(destiny, filename), filename, destiny, source['format'], StationCode, date) break else: tqdm.write( ' -- Could not download %s.%s %s -> trying next source' % (NetworkCode, StationCode, date.yyyyddd())) else: tqdm.write(' >> File for %s.%s %s already in db' % (NetworkCode, StationCode, date.yyyyddd())) pbar.close()
def RenameStation(cnn, NetworkCode, StationCode, DestNetworkCode, DestStationCode, start_date, end_date, archive_path): # make sure the destiny station exists try: rs = cnn.query( 'SELECT * FROM stations WHERE "NetworkCode" = \'%s\' AND "StationCode" = \'%s\'' % (DestNetworkCode, DestStationCode)) if rs.ntuples() == 0: # ask the user if he/she want to create it? print 'The requested destiny station does not exist. Please create it and try again' else: # select the original rinex files names # this is the set that will effectively be transferred to the dest net and stn codes # I select this portion of data here and not after we rename the station to prevent picking up more data # (due to the time window) that is ALREADY in the dest station. rs = cnn.query( 'SELECT * FROM rinex WHERE "NetworkCode" = \'%s\' AND "StationCode" = \'%s\' AND ' '"ObservationSTime" BETWEEN \'%s\' AND \'%s\'' % (NetworkCode, StationCode, start_date.yyyymmdd(), end_date.yyyymmdd())) original_rs = rs.dictresult() print " >> Beginning transfer of %i rinex files from %s.%s to %s.%s" \ % (len(original_rs), NetworkCode, StationCode, DestNetworkCode, DestStationCode) for src_rinex in tqdm(original_rs): # rename files Archive = pyArchiveStruct.RinexStruct( cnn) # type: pyArchiveStruct.RinexStruct src_file_path = Archive.build_rinex_path( NetworkCode, StationCode, src_rinex['ObservationYear'], src_rinex['ObservationDOY'], filename=src_rinex['Filename']) src_path = os.path.split( os.path.join(archive_path, src_file_path))[0] src_file = os.path.split( os.path.join(archive_path, src_file_path))[1] dest_file = src_file.replace(StationCode, DestStationCode) cnn.begin_transac() # update the NetworkCode and StationCode and filename information in the db cnn.query( 'UPDATE rinex SET "NetworkCode" = \'%s\', "StationCode" = \'%s\', "Filename" = \'%s\' ' 'WHERE "NetworkCode" = \'%s\' AND "StationCode" = \'%s\' AND "ObservationYear" = %i AND ' '"ObservationDOY" = %i AND "Filename" = \'%s\'' % (DestNetworkCode, DestStationCode, dest_file.replace('d.Z', 'o'), NetworkCode, StationCode, src_rinex['ObservationYear'], src_rinex['ObservationDOY'], src_rinex['Filename'])) # DO NOT USE pyArchiveStruct because we have an active transaction and the change is not visible yet # because we don't know anything about the archive's stucture, # we just try to replace the names and that should suffice dest_path = src_path.replace(StationCode, DestStationCode).replace( NetworkCode, DestNetworkCode) # check that the destination path exists (it should, but...) if not os.path.isdir(dest_path): os.makedirs(dest_path) shutil.move(os.path.join(src_path, src_file), os.path.join(dest_path, dest_file)) # if we are here, we are good. Commit cnn.commit_transac() date = pyDate.Date(year=src_rinex['ObservationYear'], doy=src_rinex['ObservationDOY']) # Station info transfer try: stninfo_dest = pyStationInfo.StationInfo( cnn, DestNetworkCode, DestStationCode, date) # type: pyStationInfo.StationInfo # no error, nothing to do. except pyStationInfo.pyStationInfoException: # failed to get a valid station info record! we need to incorporate the station info record from # the source station try: stninfo_dest = pyStationInfo.StationInfo( cnn, DestNetworkCode, DestStationCode) stninfo_src = pyStationInfo.StationInfo( cnn, NetworkCode, StationCode, date) # force the station code in record to be the same as deststationcode record = stninfo_src.currentrecord record['StationCode'] = DestStationCode stninfo_dest.InsertStationInfo(record) except pyStationInfo.pyStationInfoException as e: # if there is no station info for this station either, warn the user! tqdm.write( ' -- Error while updating Station Information! %s' % (str(e))) except Exception: cnn.rollback_transac() raise
def CheckRinexIntegrity(cnn, Config, stnlist, start_date, end_date, operation, JobServer): global differences tqdm.write(' >> Archive path: %s' % Config.archive_path) if operation == 'fix': modules = ('os', 'pyArchiveStruct', 'dbConnection', 'pyOptions', 'traceback', 'platform', 'pyEvents') pbar = tqdm(total=len(stnlist), ncols=80) differences = [] JobServer.create_cluster(check_rinex_stn, callback=stnrnx_callback, progress_bar=pbar, modules=modules) for stn in stnlist: StationCode = stn['StationCode'] NetworkCode = stn['NetworkCode'] JobServer.submit(NetworkCode, StationCode, start_date, end_date) JobServer.wait() pbar.close() for rinex in differences: if rinex[1]: for diff in rinex[1]: print( 'File ' + diff + ' was not found in the archive. See events for details.' ) elif rinex[0]: print('Error encountered: ' + rinex[0]) elif operation == 'report': Archive = pyArchiveStruct.RinexStruct(cnn) for stn in tqdm(stnlist, ncols=160, desc=' >> Checking archive integrity', disable=None, position=0): StationCode = stn['StationCode'] NetworkCode = stn['NetworkCode'] rs = cnn.query( 'SELECT * FROM rinex WHERE "NetworkCode" = \'%s\' AND ' '"StationCode" = \'%s\' AND ' '"ObservationSTime" BETWEEN \'%s\' AND \'%s\' ' 'ORDER BY "ObservationSTime"' % (NetworkCode, StationCode, start_date.yyyymmdd(), end_date.yyyymmdd())) rnxtbl = rs.dictresult() pbar = tqdm(total=len(rnxtbl), ncols=160, desc=' >> Checking archive integrity', disable=None, position=1) for rnx in rnxtbl: archive_path = Archive.build_rinex_path( NetworkCode, StationCode, rnx['ObservationYear'], rnx['ObservationDOY'], filename=rnx['Filename']) crinex_path = os.path.join(Config.archive_path, archive_path) pbar.postfix = '%s.%s: %s' % (NetworkCode, StationCode, archive_path) pbar.update() if not os.path.exists(crinex_path): tqdm.write( ' -- CRINEZ file %s exists in the database but was not found in the archive' % crinex_path) pbar.close()
# function to print any error that are encountered during parallel execution for msg in messages: if msg: file_append( 'errors_amend.log', 'ON ' + datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') + ' an unhandled error occurred:\n' + msg + '\n' + 'END OF ERROR =================== \n\n') return [] cnn = dbConnection.Cnn('gnss_data.cfg') options = pyOptions.ReadOptions('gnss_data.cfg') JobServer = pyJobServer.JobServer(options) archive = pyArchiveStruct.RinexStruct(cnn) for table in ['rinex']: print(" >> Processing " + table) tbl = cnn.query('SELECT * FROM ' + table + ' WHERE "Completion" is null') rnx = tbl.dictresult() callback = [] pbar = tqdm(total=len(rnx), ncols=80) depfuncs = (verify_rinex_date_multiday, ) modules = ('pyRinex', 'dbConnection', 'traceback', 'platform', 'pyDate', 'pyOptions', 'pyArchiveStruct')
def main(): # put connection and config in global variable to use inside callback_handle global cnn global repository_data_in # bind to the repository directory parser = argparse.ArgumentParser( description='Archive operations Main Program') parser.add_argument( '-purge', '--purge_locks', action='store_true', help= "Delete any network starting with '?' from the stations table and purge the contents of " "the locks table, deleting the associated files from data_in.") parser.add_argument('-np', '--noparallel', action='store_true', help="Execute command without parallelization.") args = parser.parse_args() Config = pyOptions.ReadOptions('gnss_data.cfg') repository_data_in = Config.repository_data_in if not os.path.isdir(Config.repository): print "the provided repository path in gnss_data.cfg is not a folder" exit() JobServer = pyJobServer.JobServer(Config, run_parallel=not args.noparallel, software_sync=[ Config.options['ppp_remote_local'] ]) # type: pyJobServer.JobServer cnn = dbConnection.Cnn('gnss_data.cfg') # create the execution log cnn.insert('executions', script='ArchiveService.py') # set the data_xx directories data_in = os.path.join(Config.repository, 'data_in') data_in_retry = os.path.join(Config.repository, 'data_in_retry') data_reject = os.path.join(Config.repository, 'data_rejected') # if if the subdirs exist if not os.path.isdir(data_in): os.makedirs(data_in) if not os.path.isdir(data_in_retry): os.makedirs(data_in_retry) if not os.path.isdir(data_reject): os.makedirs(data_reject) # delete any locks with a NetworkCode != '?%' cnn.query('delete from locks where "NetworkCode" not like \'?%\'') # get the locks to avoid reprocessing files that had no metadata in the database locks = cnn.query('SELECT * FROM locks') locks = locks.dictresult() if args.purge_locks: # first, delete all associated files for lock in tqdm(locks, ncols=160, unit='crz', desc='%-30s' % ' >> Purging locks', disable=None): try: os.remove( os.path.join(Config.repository_data_in, lock['filename'])) except Exception: sys.exc_clear() # purge the contents of stations. This will automatically purge the locks table cnn.query('delete from stations where "NetworkCode" like \'?%\'') # purge the networks cnn.query('delete from networks where "NetworkCode" like \'?%\'') # purge the locks already taken care of (just in case) cnn.query('delete from locks where "NetworkCode" not like \'?%\'') # get the locks to avoid reprocessing files that had no metadata in the database locks = cnn.query('SELECT * FROM locks') locks = locks.dictresult() # look for data in the data_in_retry and move it to data_in archive = pyArchiveStruct.RinexStruct(cnn) pbar = tqdm(desc='%-30s' % ' >> Scanning data_in_retry', ncols=160, unit='crz', disable=None) rfiles, paths, _ = archive.scan_archive_struct(data_in_retry, pbar) pbar.close() pbar = tqdm(desc='%-30s' % ' -- Moving files to data_in', total=len(rfiles), ncols=160, unit='crz', disable=None) for rfile, path in zip(rfiles, paths): dest_file = os.path.join(data_in, rfile) # move the file into the folder Utils.move(path, dest_file) pbar.set_postfix(crinez=rfile) pbar.update() # remove folder from data_in_retry (also removes the log file) try: # remove the log file that accompanies this Z file os.remove(path.replace('d.Z', '.log')) except Exception: sys.exc_clear() pbar.close() tqdm.write(' -- Cleaning data_in_retry') remove_empty_folders(data_in_retry) # take a break to allow the FS to finish the task time.sleep(5) files_path = [] files_list = [] pbar = tqdm(desc='%-30s' % ' >> Repository crinez scan', ncols=160, disable=None) rpaths, _, files = archive.scan_archive_struct(data_in, pbar) pbar.close() pbar = tqdm(desc='%-30s' % ' -- Checking the locks table', total=len(files), ncols=130, unit='crz', disable=None) for file, path in zip(files, rpaths): pbar.set_postfix(crinez=file) pbar.update() if path not in [lock['filename'] for lock in locks]: files_path.append(path) files_list.append(file) pbar.close() tqdm.write(" -- Found %i files in the lock list..." % (len(locks))) tqdm.write( " -- Found %i files (matching format [stnm][doy][s].[yy]d.Z) to process..." % (len(files_list))) pbar = tqdm(desc='%-30s' % ' >> Processing repository', total=len(files_path), ncols=160, unit='crz', disable=None) # dependency functions depfuncs = (check_rinex_timespan_int, write_error, error_handle, insert_data, verify_rinex_multiday) # import modules modules = ('pyRinex', 'pyArchiveStruct', 'pyOTL', 'pyPPP', 'pyStationInfo', 'dbConnection', 'Utils', 'os', 'uuid', 'datetime', 'pyDate', 'numpy', 'traceback', 'platform', 'pyBrdc', 'pyProducts', 'pyOptions', 'pyEvents') JobServer.create_cluster(process_crinex_file, depfuncs, callback_handle, pbar, modules=modules) for file_to_process, sfile in zip(files_path, files_list): JobServer.submit(file_to_process, sfile, data_reject, data_in_retry) JobServer.wait() pbar.close() JobServer.close_cluster() print_archive_service_summary() # iterate to delete empty folders remove_empty_folders(data_in)
def process_crinex_file(crinez, filename, data_rejected, data_retry): # create a uuid temporary folder in case we cannot read the year and doy from the file (and gets rejected) reject_folder = os.path.join(data_rejected, str(uuid.uuid4())) try: cnn = dbConnection.Cnn("gnss_data.cfg") Config = pyOptions.ReadOptions("gnss_data.cfg") archive = pyArchiveStruct.RinexStruct(cnn) # apply local configuration (path to repo) in the executing node crinez = os.path.join(Config.repository_data_in, crinez) except Exception: return traceback.format_exc() + ' while opening the database to process file ' + \ crinez + ' node ' + platform.node(), None # assume a default networkcode NetworkCode = 'rnx' # get the station code year and doy from the filename fileparts = archive.parse_crinex_filename(filename) if fileparts: StationCode = fileparts[0].lower() doy = int(fileparts[1]) year = int(Utils.get_norm_year_str(fileparts[3])) else: event = pyEvents.Event( Description='Could not read the station code, year or doy for file ' + crinez, EventType='error') error_handle(cnn, event, crinez, reject_folder, filename, no_db_log=True) return event['Description'], None # we can now make better reject and retry folders reject_folder = os.path.join( data_rejected, '%reason%/' + Utils.get_norm_year_str(year) + '/' + Utils.get_norm_doy_str(doy)) retry_folder = os.path.join( data_retry, '%reason%/' + Utils.get_norm_year_str(year) + '/' + Utils.get_norm_doy_str(doy)) try: # main try except block with pyRinex.ReadRinex(NetworkCode, StationCode, crinez) as rinexinfo: # type: pyRinex.ReadRinex # STOP! see if rinexinfo is a multiday rinex file if not verify_rinex_multiday(cnn, rinexinfo, Config): # was a multiday rinex. verify_rinex_date_multiday took care of it return None, None # DDG: we don't use otl coefficients because we need an approximated coordinate # we therefore just calculate the first coordinate without otl # NOTICE that we have to trust the information coming in the RINEX header (receiver type, antenna type, etc) # we don't have station info data! Still, good enough # the final PPP coordinate will be calculated by pyScanArchive on a different process # make sure that the file has the appropriate coordinates in the header for PPP. # put the correct APR coordinates in the header. # ppp didn't work, try using sh_rx2apr brdc = pyBrdc.GetBrdcOrbits(Config.brdc_path, rinexinfo.date, rinexinfo.rootdir) # inflate the chi**2 limit to make sure it will pass (even if we get a crappy coordinate) try: rinexinfo.auto_coord(brdc, chi_limit=1000) # normalize header to add the APR coordinate # empty dict since nothing extra to change (other than the APR coordinate) rinexinfo.normalize_header(dict()) except pyRinex.pyRinexExceptionNoAutoCoord: # could not determine an autonomous coordinate, try PPP anyways. 50% chance it will work pass with pyPPP.RunPPP( rinexinfo, '', Config.options, Config.sp3types, Config.sp3altrn, rinexinfo.antOffset, strict=False, apply_met=False, clock_interpolation=True) as ppp: # type: pyPPP.RunPPP try: ppp.exec_ppp() except pyPPP.pyRunPPPException as ePPP: # inflate the chi**2 limit to make sure it will pass (even if we get a crappy coordinate) # if coordinate is TOO bad it will get kicked off by the unreasonable geodetic height try: auto_coords_xyz, auto_coords_lla = rinexinfo.auto_coord( brdc, chi_limit=1000) except pyRinex.pyRinexExceptionNoAutoCoord as e: # catch pyRinexExceptionNoAutoCoord and convert it into a pyRunPPPException raise pyPPP.pyRunPPPException( 'Both PPP and sh_rx2apr failed to obtain a coordinate for %s.\n' 'The file has been moved into the rejection folder. ' 'Summary PPP file and error (if exists) follows:\n%s\n\n' 'ERROR section:\n%s\npyRinex.auto_coord error follows:\n%s' % (crinez.replace(Config.repository_data_in, ''), ppp.summary, str(ePPP).strip(), str(e).strip())) # DDG: this is correct - auto_coord returns a numpy array (calculated in ecef2lla), # so ppp.lat = auto_coords_lla is consistent. ppp.lat = auto_coords_lla[0] ppp.lon = auto_coords_lla[1] ppp.h = auto_coords_lla[2] ppp.x = auto_coords_xyz[0] ppp.y = auto_coords_xyz[1] ppp.z = auto_coords_xyz[2] # check for unreasonable heights if ppp.h[0] > 9000 or ppp.h[0] < -400: raise pyRinex.pyRinexException( os.path.relpath(crinez, Config.repository_data_in) + ' : unreasonable geodetic height (%.3f). ' 'RINEX file will not enter the archive.' % (ppp.h[0])) Result, match, _ = ppp.verify_spatial_coherence( cnn, StationCode) if Result: # insert: there is only 1 match with the same StationCode. rinexinfo.rename(NetworkCode=match[0]['NetworkCode']) insert_data(cnn, archive, rinexinfo) else: if len(match) == 1: error = "%s matches the coordinate of %s.%s (distance = %8.3f m) but the filename " \ "indicates it is %s. Please verify that this file belongs to %s.%s, rename it and " \ "try again. The file was moved to the retry folder. " \ "Rename script and pSQL sentence follows:\n" \ "BASH# mv %s %s\n" \ "PSQL# INSERT INTO stations (\"NetworkCode\", \"StationCode\", \"auto_x\", " \ "\"auto_y\", \"auto_z\", \"lat\", \"lon\", \"height\") VALUES " \ "('???','%s', %12.3f, %12.3f, %12.3f, " \ "%10.6f, %10.6f, %8.3f)\n" \ % (os.path.relpath(crinez, Config.repository_data_in), match[0]['NetworkCode'], match[0]['StationCode'], float(match[0]['distance']), StationCode, match[0]['NetworkCode'], match[0]['StationCode'], os.path.join(retry_folder, filename), os.path.join(retry_folder, filename.replace(StationCode, match[0]['StationCode'])), StationCode, ppp.x, ppp.y, ppp.z, ppp.lat[0], ppp.lon[0], ppp.h[0]) raise pyPPP.pyRunPPPExceptionCoordConflict(error) elif len(match) > 1: # a number of things could have happened: # 1) wrong station code, and more than one matching stations # (that do not match the station code, of course) # see rms.lhcl 2007 113 -> matches rms.igm0: 34.293 m, rms.igm1: 40.604 m, rms.byns: 4.819 m # 2) no entry in the database for this solution -> add a lock and populate the exit args # no match, but we have some candidates error = "Solution for RINEX in repository (%s %s) did not match a unique station location " \ "(and station code) within 5 km. Possible cantidate(s): %s. This file has been moved " \ "to data_in_retry. pSQL sentence follows:\n" \ "PSQL# INSERT INTO stations (\"NetworkCode\", \"StationCode\", \"auto_x\", " \ "\"auto_y\", \"auto_z\", \"lat\", \"lon\", \"height\") VALUES " \ "('???','%s', %12.3f, %12.3f, %12.3f, %10.6f, %10.6f, %8.3f)\n" \ % (os.path.relpath(crinez, Config.repository_data_in), rinexinfo.date.yyyyddd(), ', '.join(['%s.%s: %.3f m' % (m['NetworkCode'], m['StationCode'], m['distance']) for m in match]), StationCode, ppp.x, ppp.y, ppp.z, ppp.lat[0], ppp.lon[0], ppp.h[0]) raise pyPPP.pyRunPPPExceptionCoordConflict(error) else: # only found a station removing the distance limit (could be thousands of km away!) # The user will have to add the metadata to the database before the file can be added, # but in principle no problem was detected by the process. This file will stay in this folder # so that it gets analyzed again but a "lock" will be added to the file that will have to be # removed before the service analyzes again. # if the user inserted the station by then, it will get moved to the appropriate place. # we return all the relevant metadata to ease the insert of the station in the database otl = pyOTL.OceanLoading(StationCode, Config.options['grdtab'], Config.options['otlgrid']) # use the ppp coordinates to calculate the otl coeff = otl.calculate_otl_coeff(x=ppp.x, y=ppp.y, z=ppp.z) # add the file to the locks table so that it doesn't get processed over and over # this will be removed by user so that the file gets reprocessed once all the metadata is ready cnn.insert('locks', filename=os.path.relpath( crinez, Config.repository_data_in)) return None, [ StationCode, (ppp.x, ppp.y, ppp.z), coeff, (ppp.lat[0], ppp.lon[0], ppp.h[0]), crinez ] except (pyRinex.pyRinexExceptionBadFile, pyRinex.pyRinexExceptionSingleEpoch, pyRinex.pyRinexExceptionNoAutoCoord) \ as e: reject_folder = reject_folder.replace('%reason%', 'bad_rinex') # add more verbose output e.event['Description'] = e.event['Description'] + '\n' + os.path.relpath(crinez, Config.repository_data_in) + \ ': (file moved to ' + reject_folder + ')' e.event['StationCode'] = StationCode e.event['NetworkCode'] = '???' e.event['Year'] = year e.event['DOY'] = doy # error, move the file to rejected folder error_handle(cnn, e.event, crinez, reject_folder, filename) return None, None except pyRinex.pyRinexException as e: retry_folder = retry_folder.replace('%reason%', 'rinex_issues') # add more verbose output e.event['Description'] = e.event['Description'] + '\n' + os.path.relpath(crinez, Config.repository_data_in) + \ ': (file moved to ' + retry_folder + ')' e.event['StationCode'] = StationCode e.event['NetworkCode'] = '???' e.event['Year'] = year e.event['DOY'] = doy # error, move the file to rejected folder error_handle(cnn, e.event, crinez, retry_folder, filename) return None, None except pyPPP.pyRunPPPExceptionCoordConflict as e: retry_folder = retry_folder.replace('%reason%', 'coord_conflicts') e.event['Description'] = e.event['Description'].replace( '%reason%', 'coord_conflicts') e.event['StationCode'] = StationCode e.event['NetworkCode'] = '???' e.event['Year'] = year e.event['DOY'] = doy error_handle(cnn, e.event, crinez, retry_folder, filename) return None, None except pyPPP.pyRunPPPException as e: reject_folder = reject_folder.replace('%reason%', 'no_ppp_solution') e.event['StationCode'] = StationCode e.event['NetworkCode'] = '???' e.event['Year'] = year e.event['DOY'] = doy error_handle(cnn, e.event, crinez, reject_folder, filename) return None, None except pyStationInfo.pyStationInfoException as e: retry_folder = retry_folder.replace('%reason%', 'station_info_exception') e.event['Description'] = e.event['Description'] + '. The file will stay in the repository and will be ' \ 'processed during the next cycle of pyArchiveService.' e.event['StationCode'] = StationCode e.event['NetworkCode'] = '???' e.event['Year'] = year e.event['DOY'] = doy error_handle(cnn, e.event, crinez, retry_folder, filename) return None, None except pyOTL.pyOTLException as e: retry_folder = retry_folder.replace('%reason%', 'otl_exception') e.event['Description'] = e.event['Description'] + ' while calculating OTL for %s. ' \ 'The file has been moved into the retry folder.' \ % os.path.relpath(crinez, Config.repository_data_in) e.event['StationCode'] = StationCode e.event['NetworkCode'] = '???' e.event['Year'] = year e.event['DOY'] = doy error_handle(cnn, e.event, crinez, retry_folder, filename) return None, None except pyProducts.pyProductsExceptionUnreasonableDate as e: # a bad RINEX file requested an orbit for a date < 0 or > now() reject_folder = reject_folder.replace('%reason%', 'bad_rinex') e.event['Description'] = e.event['Description'] + ' during %s. The file has been moved to the rejected ' \ 'folder. Most likely bad RINEX header/data.' \ % os.path.relpath(crinez, Config.repository_data_in) e.event['StationCode'] = StationCode e.event['NetworkCode'] = '???' e.event['Year'] = year e.event['DOY'] = doy error_handle(cnn, e.event, crinez, reject_folder, filename) return None, None except pyProducts.pyProductsException as e: # if PPP fails and ArchiveService tries to run sh_rnx2apr and it doesn't find the orbits, send to retry retry_folder = retry_folder.replace('%reason%', 'sp3_exception') e.event['Description'] = e.event['Description'] + ': %s. Check the brdc/sp3/clk files and also check that ' \ 'the RINEX data is not corrupt.' \ % os.path.relpath(crinez, Config.repository_data_in) e.event['StationCode'] = StationCode e.event['NetworkCode'] = '???' e.event['Year'] = year e.event['DOY'] = doy error_handle(cnn, e.event, crinez, retry_folder, filename) return None, None except dbConnection.dbErrInsert as e: reject_folder = reject_folder.replace('%reason%', 'duplicate_insert') # insert duplicate values: two parallel processes tried to insert different filenames # (or the same) of the same station to the db: move it to the rejected folder. # The user might want to retry later. Log it in events # this case should be very rare event = pyEvents.Event( Description='Duplicate rinex insertion attempted while processing ' + os.path.relpath(crinez, Config.repository_data_in) + ' : (file moved to rejected folder)\n' + str(e), EventType='warn', StationCode=StationCode, NetworkCode='???', Year=year, DOY=doy) error_handle(cnn, event, crinez, reject_folder, filename) return None, None except Exception: retry_folder = retry_folder.replace('%reason%', 'general_exception') event = pyEvents.Event( Description=traceback.format_exc() + ' processing: ' + os.path.relpath(crinez, Config.repository_data_in) + ' in node ' + platform.node() + ' (file moved to retry folder)', EventType='error') error_handle(cnn, event, crinez, retry_folder, filename, no_db_log=True) return event['Description'], None return None, None
def ExecuteGamit(cnn, JobServer, GamitConfig, stations, check_stations, ignore_missing, dates, dry_run=False, create_kml=False): modules = ('pyRinex', 'datetime', 'os', 'shutil', 'pyBrdc', 'pySp3', 'subprocess', 're', 'pyETM', 'glob', 'platform', 'traceback') tqdm.write( ' >> %s Creating GAMIT session instances and executing GAMIT, please wait...' % print_datetime()) sessions = [] archive = pyArchiveStruct.RinexStruct( cnn) # type: pyArchiveStruct.RinexStruct for date in tqdm(dates, ncols=80, disable=None): # make the dir for these sessions # this avoids a racing condition when starting each process pwd = GamitConfig.gamitopt['solutions_dir'].rstrip( '/') + '/' + date.yyyy() + '/' + date.ddd() if not os.path.exists(pwd): os.makedirs(pwd) net_object = Network(cnn, archive, GamitConfig, stations, date, check_stations, ignore_missing) sessions += net_object.sessions # Network outputs the sessions to be processed # submit them if they are not ready tqdm.write( ' -- %s %i GAMIT sessions to submit (%i already processed)' % (print_datetime(), len([sess for sess in net_object.sessions if not sess.ready]), len([sess for sess in net_object.sessions if sess.ready]))) pbar = tqdm(total=len(sessions), disable=None, desc=' >> GAMIT sessions completion', ncols=100) # create the cluster for the run JobServer.create_cluster(run_gamit_session, (pyGamitTask.GamitTask, ), gamit_callback, pbar, modules=modules) for GamitSession in sessions: if not GamitSession.ready: # do not submit the task if the session is ready! # tqdm.write(' >> %s Init' % (datetime.now().strftime('%Y-%m-%d %H:%M:%S'))) GamitSession.initialize() # tqdm.write(' >> %s Done Init' % (datetime.now().strftime('%Y-%m-%d %H:%M:%S'))) task = pyGamitTask.GamitTask(GamitSession.remote_pwd, GamitSession.params, GamitSession.solution_pwd) # tqdm.write(' >> %s Done task' % (datetime.now().strftime('%Y-%m-%d %H:%M:%S'))) JobServer.submit(task, task.params['DirName'], task.date.year, task.date.doy, dry_run) msg = 'Submitting for processing' else: msg = 'Session already processed' pbar.update() tqdm.write( ' -- %s %s %s %s%02i -> %s' % (print_datetime(), GamitSession.NetName, GamitSession.date.yyyyddd(), GamitSession.org, GamitSession.subnet if GamitSession.subnet is not None else 0, msg)) if create_kml: # generate a KML of the sessions generate_kml(dates, sessions, GamitConfig) tqdm.write(' -- %s Done initializing and submitting GAMIT sessions' % print_datetime()) # DDG: because of problems with keeping the database connection open (in some platforms), we invoke a class # that just performs a select on the database timer = DbAlive(cnn, 120) JobServer.wait() pbar.close() timer.stop() JobServer.close_cluster() return sessions
def main(): global cnn parser = argparse.ArgumentParser(description='Parallel.GAMIT main execution program') parser.add_argument('session_cfg', type=str, nargs=1, metavar='session.cfg', help="Filename with the session configuration to run Parallel.GAMIT") parser.add_argument('-d', '--date', type=str, nargs=2, metavar='{date}', help="Date range to process. Can be specified in yyyy/mm/dd yyyy_doy wwww-d format") parser.add_argument('-dp', '--date_parser', type=str, nargs=2, metavar='{year} {doys}', help="Parse date using ranges and commas (e.g. 2018 1,3-6). " "Cannot cross year boundaries") parser.add_argument('-e', '--exclude', type=str, nargs='+', metavar='station', help="List of stations to exclude from this processing (e.g. -e igm1 lpgs vbca)") parser.add_argument('-p', '--purge', action='store_true', help="Purge year doys from the database and directory structure and re-run the solution.") parser.add_argument('-dry', '--dry_run', action='store_true', help="Generate the directory structures (locally) but do not run GAMIT. " "Output is left in the production directory.") parser.add_argument('-kml', '--generate_kml', action='store_true', help="Generate KML and exit without running GAMIT.") parser.add_argument('-np', '--noparallel', action='store_true', help="Execute command without parallelization.") args = parser.parse_args() dates = None drange = None try: if args.date_parser: year = int(args.date_parser[0]) doys = parseIntSet(args.date_parser[1]) if any([doy for doy in doys if doy < 1]): parser.error('DOYs cannot start with zero. Please selected a DOY range between 1-365/366') if 366 in doys: if year % 4 != 0: parser.error('Year ' + str(year) + ' is not a leap year: DOY 366 does not exist.') dates = [pyDate.Date(year=year, doy=i) for i in doys] drange = [dates[0], dates[-1]] else: drange = process_date(args.date, missing_input=None) if not all(drange): parser.error('Must specify a start and end date for the processing.') # get the dates to purge dates = [pyDate.Date(mjd=i) for i in range(drange[0].mjd, drange[1].mjd + 1)] except ValueError as e: parser.error(str(e)) print(' >> Reading configuration files and creating project network, please wait...') GamitConfig = pyGamitConfig.GamitConfiguration(args.session_cfg[0]) # type: pyGamitConfig.GamitConfiguration print(' >> Checing GAMIT tables for requested config and year, please wait...') JobServer = pyJobServer.JobServer(GamitConfig, check_gamit_tables=(pyDate.Date(year=drange[1].year, doy=drange[1].doy), GamitConfig.gamitopt['eop_type']), run_parallel=not args.noparallel, software_sync=GamitConfig.gamitopt['gamit_remote_local']) cnn = dbConnection.Cnn(GamitConfig.gamitopt['gnss_data']) # type: dbConnection.Cnn # to exclude stations, append them to GamitConfig.NetworkConfig with a - in front exclude = args.exclude if exclude is not None: print(' >> User selected list of stations to exclude:') Utils.print_columns(exclude) GamitConfig.NetworkConfig['stn_list'] += ',-' + ',-'.join(exclude) if args.dry_run is not None: dry_run = args.dry_run else: dry_run = False if not dry_run: # ignore if calling a dry run # purge solutions if requested purge_solutions(JobServer, args, dates, GamitConfig) # initialize stations in the project stations = station_list(cnn, GamitConfig.NetworkConfig, drange) tqdm.write(' >> Creating GAMIT session instances, please wait...') sessions = [] archive = pyArchiveStruct.RinexStruct(cnn) # type: pyArchiveStruct.RinexStruct for date in tqdm(dates, ncols=80): # make the dir for these sessions # this avoids a racing condition when starting each process pwd = GamitConfig.gamitopt['solutions_dir'].rstrip('/') + '/' + date.yyyy() + '/' + date.ddd() if not os.path.exists(pwd): os.makedirs(pwd) net_object = Network(cnn, archive, GamitConfig, stations, date) sessions += net_object.sessions if args.generate_kml: # generate a KML of the sessions generate_kml(dates, sessions, GamitConfig) exit() # print a summary of the current project (NOT VERY USEFUL AFTER ALL) # print_summary(stations, sessions, drange) # run the job server ExecuteGamit(sessions, JobServer, dry_run) # execute globk on doys that had to be divided into subnets if not args.dry_run: ExecuteGlobk(GamitConfig, sessions, dates) # parse the zenith delay outputs ParseZTD(GamitConfig.NetworkConfig.network_id, sessions, GamitConfig) print(' >> Done processing and parsing information. Successful exit from Parallel.GAMIT')