def main(user_args): pds_id = user_args.pdsid delete_from_di = user_args.di delete_from_upc = user_args.upc if delete_from_di: pds_session_maker, pds_engine = db_connect(pds_db) pds_session = pds_session_maker() query_res = pds_session.query(Files).filter( Files.filename.contains(pds_id)) num_pds_queries = len(list(query_res)) while(True): print(f'You will be deleteing {num_pds_queries} from the di ' + f"database {credentials[pds_db]['db']}") user_answer = input('Are you sure?[Y/N]:') if user_answer == 'Y' or user_answer == 'N': break else: print(f'Invalid input: {user_answer}') if user_answer == 'Y': for record in query_res: pds_session.delete(record) pds_session.commit() pds_session.flush() pds_session.close() if delete_from_upc: upc_session_maker, upc_engine = db_connect(upc_db) upc_session = upc_session_maker() query_res = upc_session.query(DataFiles).filter( DataFiles.productid == pds_id) num_upc_queries = len(list(query_res)) while(True): print(f'You will be deleteing {num_upc_queries} from the upc ' + f"database {credentials[upc_db]['db']}") user_answer = input('Are you sure?[Y/N]:') if user_answer =='Y' or user_answer == 'N': break else: print(f'Invalid input: {user_answer}') if user_answer == 'Y': for record in query_res: upc_session.delete(record) upc_session.commit() upc_session.flush() upc_session.close()
def main(user_args): date = user_args.date instrument = user_args.instrument spacecraft = user_args.spacecraft database = user_args.database target = user_args.target loglevel = user_args.loglevel statistics = user_args.statistics logging.basicConfig(level=loglevel) source_session_maker, _ = db_connect(upc_db) source_session = source_session_maker() upc_ids = get_upc_ids(source_session, date, instrument, spacecraft, target) if statistics: get_stats(source_session, upc_ids) return targets = ['public_mars', 'public_moon', 'public_other'] #targets = ['upcdev_mars', 'upcdev_moon', 'upcdev_other'] target_sessions = {} for target in targets: Session, _ = db_connect(target) session = Session() target_sessions[target] = session instrument = get_instrument(source_session, instrument, spacecraft) for target_session in target_sessions.values(): sync_instrument(target_session, instrument) instrument_keywords = get_keywords(source_session, instrument, spacecraft) common_keywords = get_keywords(source_session, 'COMMON') # Sync keywords, instruments with all databases for target_session in target_sessions.values(): #sync_instrument(target_session, instrument) sync_keywords(target_session, instrument_keywords) sync_keywords(target_session, common_keywords) target_session.commit() meta_types = [MetaTime, MetaString, MetaGeometry, MetaBoolean, MetaBands, MetaPrecision, MetaInteger] for upc_id in upc_ids: sync_upc_id(source_session, target_sessions, upc_id, meta_types)
def __init__(self, process_name, archive, volume=None, search=None, log_level='INFO', namespace=None): self.process_name = process_name self.archive = archive self.logger = self.get_logger(log_level) self.archive_info = json.load(open(pds_info, 'r')) try: self.archive_id = self.get_archive_att('archiveid') except KeyError: self.logger.error("Archive %s not found in %s", archive, pds_info) raise self.volume = volume self.search = search self.namespace = namespace self.ready_queue = RedisQueue(f"{process_name}_ReadyQueue", namespace) self.error_queue = RedisQueue(f"{process_name}_ErrorQueue", namespace) self.logger.info("%s queue: %s", process_name, self.ready_queue.id_name) try: pds_session_maker, _ = db_connect(pds_db) self.logger.info('Database Connection Success') except Exception as e: self.logger.error('Database Connection Error\n\n%s', e) raise self.session_maker = pds_session_maker
def session(tables, request): Session, _ = db_connect('upc_test') session = Session() def cleanup(): session.rollback() # Necessary because some tests intentionally fail for t in reversed(tables): # Skip the srid table if t != 'spatial_ref_sys': session.execute(f'TRUNCATE TABLE {t} CASCADE') # Reset the autoincrementing if t in ['datafiles', 'instruments', 'targets']: if t == 'datafiles': column = f'{t}_upcid_seq' if t == 'instruments': column = f'{t}_instrumentid_seq' if t == 'targets': column = f'{t}_targetid_seq' session.execute(f'ALTER SEQUENCE {column} RESTART WITH 1') session.commit() request.addfinalizer(cleanup) return session
def main(): PDS_info = json.load(open(pds_info, 'r')) reddis_queue = RedisQueue('DI_ReadyQueue') try: session, _ = db_connect(pds_db) except Exception as e: print(e) return 1 for target in PDS_info: archive_id = PDS_info[target]['archiveid'] td = (datetime.datetime.now(pytz.utc) - datetime.timedelta(days=30)).strftime("%Y-%m-%d %H:%M:%S") testing_date = datetime.datetime.strptime(str(td), "%Y-%m-%d %H:%M:%S") expired = archive_expired(session, archive_id, testing_date) # If any files within the archive are expired, send them to the queue if expired.count(): # @TODO get rid of print statements or enable with --verbose? for f in expired: reddis_queue.QueueAdd((f.filename, target)) print('Archive {} DI Ready: {} Files'.format( target, str(expired.count()))) else: print('Archive {} DI Current'.format(target)) return 0
def main(): # pdb.set_trace() args = Args() args.parse_args() RQ = RedisQueue('ChecksumUpdate_Queue') # @TODO Remove/replace "archiveID" archiveID = { 'cassiniISS': 'cassini_iss_edr', 'mroCTX': 16, 'mroHIRISE_EDR': '124', 'LROLRC_EDR': 74 } # ********* Set up logging ************* logger = logging.getLogger('ChecksumUpdate_Queueing.' + args.archive) level = logging.getLevelName(args.log_level) logger.setLevel(level) logFileHandle = logging.FileHandler(pds_log + 'DI.log') formatter = logging.Formatter( '%(asctime)s - %(name)s - %(levelname)s, %(message)s') logFileHandle.setFormatter(formatter) logger.addHandler(logFileHandle) logger.info('Starting %s Checksum update Queueing', args.archive) if args.volume: logger.info('Queueing %s Volume', args.volume) try: # Throws away engine information session, _ = db_connect(pds_db) logger.info('DataBase Connecton: Success') except: logger.error('DataBase Connection: Error') return 1 if args.volume: volstr = '%' + args.volume + '%' QueryOBJ = session.query(Files).filter( Files.archiveid == archiveID[args.archive], Files.filename.like(volstr)) else: QueryOBJ = session.query(Files).filter( Files.archiveid == archiveID[args.archive]) addcount = 0 for element in QueryOBJ: try: RQ.QueueAdd(element.filename) addcount = addcount + 1 except: logger.error('File %s Not Added to DI_ReadyQueue', element.filename) logger.info('Files Added to Queue %s', addcount) logger.info('DI Queueing Complete')
def __init__(self, database): """ Parameters ----------- database : str """ if database == "JOBS": self.session_maker, self.engine = db_connect(cluster_db) Base = automap_base() Base.prepare(self.engine, reflect=True) self.processingTAB = Base.classes.processing elif database == "DI": base = automap_base() self.session_maker, _ = db_connect(pds_db) self.files = Files self.archives = Archives self.DB_files = self.files
def create_pds_database(): try: Session, engine = db_connect(pds_db) except: Session = None engine = None if isinstance(Session, sqlalchemy.orm.sessionmaker): # Create the database if not database_exists(engine.url): create_database(engine.url, template='template_postgis' ) # This is a hardcode to the local template Base.metadata.bind = engine # If the table does not exist, this will create it. This is used in case a # user has manually dropped a table so that the project is not wrecked. Base.metadata.create_all(tables=[ ProcessRuns.__table__, Files.__table__, Archives.__table__ ]) with open(pds_info, 'r') as fp: data = json.load(fp) i = 0 archive_list = [] archive_ids = [] unsupported_args = [ 'bandbinQuery', 'FilterName', 'upc_reqs', 'mission', 'bandorder' ] for key, value in data.items(): value['archive_name'] = key value['missionid'] = i value['pds_archive'] = True value['primary_node'] = 'USGS' value.pop('path') for key in unsupported_args: try: value.pop(key) except: pass if value['archiveid'] not in archive_ids: archive_ids.append(value['archiveid']) archive_list.append(Archives(**value)) i += 1 session = Session() try: session.add_all(archive_list) session.commit() except sqlalchemy.exc.IntegrityError as e: print( "Not adding new archives to Archives table, as the table is likely " + "already populated. This a result of the following: \n\n{}". format(e)) session.close()
def main(): # pdb.set_trace() args = Args() args.parse_args() logger = logging.getLogger('UPC_Queueing.' + args.archive) logger.setLevel(logging.INFO) # logFileHandle = logging.FileHandler('/usgs/cdev/PDS/logs/Process.log') logFileHandle = logging.FileHandler(pds_log + 'Process.log') formatter = logging.Formatter( '%(asctime)s - %(name)s - %(levelname)s, %(message)s') logFileHandle.setFormatter(formatter) logger.addHandler(logFileHandle) logger.info('Starting Process') PDSinfoDICT = json.load(open(pds_info, 'r')) try: archiveID = PDSinfoDICT[args.archive]['archiveid'] except KeyError: print("\nArchive '{}' not found in {}\n".format( args.archive, pds_info)) print("The following archives are available:") for k in PDSinfoDICT.keys(): print("\t{}".format(k)) exit() RQ = RedisQueue('UPC_ReadyQueue') try: session, _ = db_connect(pds_db) print('Database Connection Success') except Exception as e: print(e) print('Database Connection Error') if args.volume: volstr = '%' + args.volume + '%' qOBJ = session.query(Files).filter(Files.archiveid == archiveID, Files.filename.like(volstr), Files.upc_required == 't') else: qOBJ = session.query(Files).filter(Files.archiveid == archiveID, Files.upc_required == 't') if qOBJ: addcount = 0 for element in qOBJ: fname = PDSinfoDICT[args.archive]['path'] + element.filename fid = element.fileid RQ.QueueAdd((fname, fid, args.archive)) addcount = addcount + 1 logger.info('Files Added to UPC Queue: %s', addcount) print("Done")
def main(user_args): log_level = user_args.log_level PDS_info = json.load(open(pds_info, 'r')) reddis_queue = RedisQueue('UPC_ReadyQueue') logger = logging.getLogger('UPC_Queueing') level = logging.getLevelName(log_level) logger.setLevel(level) logFileHandle = logging.FileHandler(pds_log + 'Process.log') formatter = logging.Formatter( '%(asctime)s - %(name)s - %(levelname)s, %(message)s') logFileHandle.setFormatter(formatter) logger.addHandler(logFileHandle) logger.info("UPC Queue: %s", reddis_queue.id_name) try: Session, _ = db_connect(pds_db) session = Session() except Exception as e: logger.error("%s", e) return 1 # For each archive in the db, test if there are files that are ready to # process for archive_id in session.query(Files.archiveid).distinct(): result = session.query(Files).filter(Files.archiveid == archive_id, Files.upc_required == 't') # Get filepath from archive id archive_name = session.query(Archives.archive_name).filter( Archives.archiveid == archive_id).first() # No archive name = no path. Skip these values. if (archive_name is None): logger.warn("No archive name found for archive id: %s", archive_id) continue try: # Since results are returned as lists, we have to access the 0th # element to pull out the string archive name. fpath = PDS_info[archive_name[0]]['path'] except KeyError: logger.warn("Unable to locate file path for archive id %s", archive_id) continue # Add each file in the archive to the redis queue. for element in result: fname = fpath + element.filename fid = element.fileid reddis_queue.QueueAdd((fname, fid, archive_name[0])) logger.info("Added %s files from %s", result.count(), archive_name) return 0
def main(): path = cfg.pow_map2_base session, _ = db_connect('clusterjob_prd') id2task = map_type_ids(session) try: n_days = sys.argv[1] except IndexError: n_days = 14 old_files = get_old_keys(session) remove(path, old_files, id2task, session) return 0
def main(): args = Args() args.parse_args() PDSinfoDICT = json.load(open(pds_info, 'r')) try: archiveID = PDSinfoDICT[args.archive]['archiveid'] except KeyError: print("\nArchive '{}' not found in {}\n".format(args.archive, pds_info)) print("The following archives are available:") for k in PDSinfoDICT.keys(): print("\t{}".format(k)) exit() logger = logging.getLogger('DI_Ready.' + args.archive) level = logging.getLevelName(args.log_level) logger.setLevel(level) logFileHandle = logging.FileHandler(pds_log + 'DI.log') formatter = logging.Formatter( '%(asctime)s - %(name)s - %(levelname)s, %(message)s') logFileHandle.setFormatter(formatter) logger.addHandler(logFileHandle) try: # Throws away 'engine' information session, _ = db_connect(pds_db) logger.info("%s", args.archive) logger.info("Database Connection Success") except Exception as e: logger.error("%s", e) else: # if db connection fails, there's no sense in doing this part td = (datetime.datetime.now(pytz.utc) - datetime.timedelta(days=30)).strftime("%Y-%m-%d %H:%M:%S") testing_date = datetime.datetime.strptime(str(td), "%Y-%m-%d %H:%M:%S") if args.volume: expired = volume_expired(session, archiveID, args.volume, testing_date) if expired.count(): logger.info("Volume %s DI Ready: %s Files", args.volume, str(expired.count())) else: logger.info('Volume %s DI Current', args.volume) else: expired = archive_expired(session, archiveID, testing_date) if expired.count(): logger.info('Archive %s DI Ready: %s Files', args.archive, str(expired.count())) else: logger.info('Archive %s DI Current', args.archive)
def main(user_args): archive = user_args.archive volume = user_args.volume search = user_args.search log_level = user_args.log_level logger = logging.getLogger('Browse_Queueing.' + archive) level = logging.getLevelName(log_level) logger.setLevel(level) logFileHandle = logging.FileHandler(pds_log + 'Process.log') formatter = logging.Formatter( '%(asctime)s - %(name)s - %(levelname)s, %(message)s') logFileHandle.setFormatter(formatter) logger.addHandler(logFileHandle) logger.info('Starting Process') PDSinfoDICT = json.load(open(pds_info, 'r')) archiveID = PDSinfoDICT[archive]['archiveid'] RQ = RedisQueue('Browse_ReadyQueue') logger.info("Browse Queue: %s", RQ.id_name) try: Session, _ = db_connect(pds_db) session = Session() logger.info('Database Connection Success') except: logger.error('Database Connection Error') return 1 if volume: volstr = '%' + volume + '%' qOBJ = session.query(Files).filter(Files.archiveid == archiveID, Files.filename.like(volstr), Files.upc_required == 't') else: qOBJ = session.query(Files).filter(Files.archiveid == archiveID, Files.upc_required == 't') if qOBJ: addcount = 0 for element in qOBJ: fname = PDSinfoDICT[archive]['path'] + element.filename fid = element.fileid RQ.QueueAdd((fname, fid, archive)) addcount = addcount + 1 logger.info('Files Added to UPC Queue: %s', addcount)
def main(user_args): if user_args.path: path = user_args.path else: path = summaries_path Session, _ = db_connect(upc_db) session = Session() for key in queries: json_query = "with t AS ({}) SELECT json_agg(t) FROM t;".format(queries[key]) output = session.execute(json_query) json_output = json.dumps([dict(line) for line in output]) with open(path + key + ".json", "w") as json_file: json_file.write(json_output)
def main(): # pdb.set_trace() args = Args() args.parse_args() logger = logging.getLogger('Thumbnail_Queueing.' + args.archive) logger.setLevel(logging.INFO) logFileHandle = logging.FileHandler(pds_log + 'Process.log') formatter = logging.Formatter( '%(asctime)s - %(name)s - %(levelname)s, %(message)s') logFileHandle.setFormatter(formatter) logger.addHandler(logFileHandle) logger.info('Starting Process') PDSinfoDICT = json.load(open(pds_info, 'r')) archiveID = PDSinfoDICT[args.archive]['archiveid'] RQ = RedisQueue('Thumbnail_ReadyQueue') try: Session, _ = db_connect(pds_db) session = Session() logger.info('Database Connection Success') except: logger.error('Database Connection Error') if args.volume: volstr = '%' + args.volume + '%' qOBJ = session.query(Files).filter(Files.archiveid == archiveID, Files.filename.like(volstr), Files.upc_required == 't') else: qOBJ = session.query(Files).filter(Files.archiveid == archiveID, Files.upc_required == 't') if qOBJ: addcount = 0 for element in qOBJ: fname = PDSinfoDICT[args.archive]['path'] + element.filename fid = element.fileid RQ.QueueAdd((fname, fid, args.archive)) addcount = addcount + 1 logger.info('Files Added to Thumbnail Queue: %s', addcount)
def main(): args = Args() args.parse_args() PDSinfoDICT = json.load(open(pds_info, 'r')) try: archiveID = PDSinfoDICT[args.archive]['archiveid'] except KeyError: print("\nArchive '{}' not found in {}\n".format( args.archive, pds_info)) print("The following archives are available:") for k in PDSinfoDICT.keys(): print("\t{}".format(k)) exit() try: # Throws away 'engine' information session, _ = db_connect(pds_db) print(args.archive) print('Database Connection Success') except Exception as e: print(e) else: # if db connection fails, there's no sense in doing this part td = (datetime.datetime.now(pytz.utc) - datetime.timedelta(days=30)).strftime("%Y-%m-%d %H:%M:%S") testing_date = datetime.datetime.strptime(str(td), "%Y-%m-%d %H:%M:%S") if args.volume: expired = volume_expired(session, archiveID, args.volume, testing_date) if expired.count(): print('Volume {} DI Ready: {} Files'.format( args.volume, str(expired.count()))) else: print('Volume {} DI Current'.format(args.volume)) else: expired = archive_expired(session, archiveID, testing_date) if expired.count(): print('Archive {} DI Ready: {} Files'.format( args.archive, str(expired.count()))) else: print('Archive {} DI Current'.format(args.archive))
def main(): PDS_info = json.load(open(pds_info, 'r')) reddis_queue = RedisQueue('UPC_ReadyQueue') try: # Safe to use prd database here because there are no writes/edits. session, _ = db_connect(pds_db) # @TODO Catch exceptions by type. Bad practice to 'except Exception,' but # I don't know what exception could happen here. except Exception as e: print(e) return 1 # For each archive in the db, test if there are files that are ready to # process for archive_id in session.query(Files.archiveid).distinct(): result = session.query(Files).filter(Files.archiveid == archive_id, Files.upc_required == 't') # Get filepath from archive id archive_name = session.query(Archives.archive_name).filter( Archives.archiveid == archive_id).first() # No archive name = no path. Skip these values. if (archive_name is None): # @TODO log an error continue try: # Since results are returned as lists, we have to access the 0th # element to pull out the string archive name. fpath = PDS_info[archive_name[0]]['path'] except KeyError: continue # Add each file in the archive to the redis queue. for element in result: fname = fpath + element.filename fid = element.fileid reddis_queue.QueueAdd((fname, fid, archive_name[0])) return 0
def session(tables, request): Session, _ = db_connect('upc_test') session = Session() models.Targets.create(session, targetname='FAKE_TARGET', system='FAKE_SYSTEM') models.Instruments.create(session, instrument='FAKE_CAMERA', spacecraft='FAKE_CRAFT') models.DataFiles.create(session, instrumentid=1, targetid=1, source='/Path/to/pds/file.img') models.SearchTerms.create(session, upcid=1, instrumentid=1, targetid=1) models.JsonKeywords.create(session, upcid=1) session.commit() session.flush() def cleanup(): session.rollback() # Necessary because some tests intentionally fail for t in reversed(tables): # Skip the srid table if t != 'spatial_ref_sys': session.execute(f'TRUNCATE TABLE {t} CASCADE') # Reset the autoincrementing if t in ['datafiles', 'instruments', 'targets']: if t == 'datafiles': column = f'{t}_upcid_seq' if t == 'instruments': column = f'{t}_instrumentid_seq' if t == 'targets': column = f'{t}_targetid_seq' session.execute(f'ALTER SEQUENCE {column} RESTART WITH 1') session.commit() request.addfinalizer(cleanup) return session
def main(): Session, _ = db_connect(upc_db) path = summaries_path with session_scope(Session) as session: print("Creating Hist Table") session.execute(query) histogram_qobj = session.query("histogram_summary") total_rows = session.execute( "SELECT count(*) FROM histogram_summary;").first()[0] page_number = 0 number_of_rows_per_page = 200000 complete_json_output = [] print("Paging hist results") while True: lower_bound = page_number * number_of_rows_per_page upper_bound = (page_number * number_of_rows_per_page) + number_of_rows_per_page if upper_bound > total_rows: number_of_rows_per_page = total_rows - lower_bound json_query = "with t AS (SELECT * FROM histogram_summary LIMIT {} OFFSET {}) SELECT json_agg(t) FROM t;".format( number_of_rows_per_page, lower_bound) output = session.execute(json_query).fetchall() complete_json_output.extend([dict(line) for line in output]) page_number += 1 if upper_bound > total_rows: break print("Finished view generation") print("Writing Json") json_output = json.dumps(complete_json_output) with open(path + "histogram_summary.json", "a") as json_file: json_file.write(json_output)
def main(): args = Args() args.parse_args() logger = logging.getLogger('DI_Queueing') level = logging.getLevelName(args.log_level) logger.setLevel(level) logFileHandle = logging.FileHandler(pds_log + 'DI.log') formatter = logging.Formatter( '%(asctime)s - %(name)s - %(levelname)s, %(message)s') logFileHandle.setFormatter(formatter) logger.addHandler(logFileHandle) PDS_info = json.load(open(pds_info, 'r')) reddis_queue = RedisQueue('DI_ReadyQueue') logger.info("DI Queue: %s", reddis_queue.id_name) try: session, _ = db_connect(pds_db) except Exception as e: logger.error("%s", e) return 1 for target in PDS_info: archive_id = PDS_info[target]['archiveid'] td = (datetime.datetime.now(pytz.utc) - datetime.timedelta(days=30)).strftime("%Y-%m-%d %H:%M:%S") testing_date = datetime.datetime.strptime(str(td), "%Y-%m-%d %H:%M:%S") expired = archive_expired(session, archive_id, testing_date) # If any files within the archive are expired, send them to the queue if expired.count(): for f in expired: reddis_queue.QueueAdd((f.filename, target)) logger.info('Archive %s DI Ready: %s Files', target, str(expired.count())) else: logger.info('Archive %s DI Current', target) return 0
def main(): args = Args() args.parse_args() key = args.key if key is None: print("No key specified.\nUsage:\t python clear_timestamps.py -k <job_key>") exit(1) Session, engine = db_connect('clusterjob_prd') session = Session() record = session.query(Processing).filter(Processing.key == key).first() record.queued = None record.started = None record.finished = None record.accessed = None record.notified = None record.purged = None session.merge(record) session.flush() session.commit()
class JsonKeywords(BaseMixin, Base): __tablename__ = "json_keywords" upcid = Column(Integer, ForeignKey('datafiles.upcid'), primary_key=True) jsonkeywords = Column(MutableDict.as_mutable(JSONB)) class_map = { 'datafiles': DataFiles, 'instruments': Instruments, 'targets': Targets, 'search_terms': SearchTerms } try: Session, engine = db_connect(upc_db) except: Session = None engine = None if isinstance(Session, sqlalchemy.orm.sessionmaker): # Create the database if not database_exists(engine.url): create_database(engine.url, template='template_postgis' ) # This is a hardcode to the local template Base.metadata.bind = engine # If the table does not exist, this will create it. This is used in case a # user has manually dropped a table so that the project is not wrecked. Base.metadata.create_all(tables=[
def main(user_args): log_level = user_args.log_level PDSinfoDICT = json.load(open(pds_info, 'r')) # Set up logging logger = logging.getLogger('DI_Process') level = logging.getLevelName(log_level) logger.setLevel(level) logFileHandle = logging.FileHandler(pds_log + 'DI.log') formatter = logging.Formatter( '%(asctime)s - %(name)s - %(levelname)s, %(message)s') logFileHandle.setFormatter(formatter) logger.addHandler(logFileHandle) logger.info('Starting DI Process') try: Session, engine = db_connect(pds_db) session = Session() logger.info('DataBase Connecton: Success') except Exception as e: logger.error('DataBase Connection Error: %s', str(e)) return 1 RQ = RedisQueue('DI_ReadyQueue') RQ_error = RedisQueue(upc_error_queue) RQ_lock = RedisLock(lock_obj) RQ_lock.add({RQ.id_name: '1'}) index = 0 logger.info("DI Queue: %s", RQ.id_name) while int(RQ.QueueSize()) > 0 and RQ_lock.available(RQ.id_name): item = literal_eval(RQ.QueueGet()) inputfile = item[0] archive = item[1] logger.debug("%s - %s", inputfile, archive) try: Qelement = session.query(Files).filter( Files.filename == inputfile).one() except Exception as e: logger.warn('Filename query failed for inputfile %s: %s', inputfile, str(e)) continue archive_path = PDSinfoDICT[archive]['path'] cpfile = archive_path + Qelement.filename if os.path.isfile(cpfile): f_hash = hashlib.md5() with open(cpfile, "rb") as f: for chunk in iter(lambda: f.read(4096), b""): f_hash.update(chunk) checksum = f_hash.hexdigest() Qelement.di_pass = checksum == Qelement.checksum if !Qelement.di_pass: logger.warn('File %s checksum %s does not match the database entry checksum %s', cpfile, checksum, Qelement.checksum) Qelement.di_date = datetime.datetime.now( pytz.utc).strftime("%Y-%m-%d %H:%M:%S") session.flush() index = index + 1 if index > 50: session.commit() logger.info('Session Commit for 50 Records: Success') index = 0 else: RQ_error.QueueAdd(f'Unable to locate or access {inputfile} during DI processing') logger.warn('File %s Not Found', cpfile) try: session.commit() logger.info("End Commit DI process to Database: Success") index = 1 except Exception as e: logger.warn("Unable to commit changes to database\n\n%s", e) session.rollback() # Close connection to database session.close() engine.dispose()
def main(user_args): archive = user_args.archive volume = user_args.volume jobarray = user_args.jobarray log_level = user_args.log_level RQ = RedisQueue('DI_ReadyQueue') PDSinfoDICT = json.load(open(pds_info, 'r')) try: archiveID = PDSinfoDICT[archive]['archiveid'] except KeyError: print("\nArchive '{}' not found in {}\n".format(archive, pds_info)) print("The following archives are available:") for k in PDSinfoDICT.keys(): print("\t{}".format(k)) exit() logger = logging.getLogger('DI_Queueing.' + archive) level = logging.getLevelName(log_level) logger.setLevel(level) logFileHandle = logging.FileHandler(pds_log + 'DI.log') formatter = logging.Formatter( '%(asctime)s - %(name)s - %(levelname)s, %(message)s') logFileHandle.setFormatter(formatter) logger.addHandler(logFileHandle) logger.info("DI Queue: %s", RQ.id_name) logger.info('Starting %s DI Queueing', archive) if volume: logger.info('Queueing %s Volume', volume) try: Session, _ = db_connect(pds_db) session = Session() logger.info('DataBase Connecton: Success') except: logger.error('DataBase Connection: Error') return 1 if volume: volstr = '%' + volume + '%' vol_exists = session.query(Files).filter( Files.archiveid == archiveID, Files.filename.like(volstr)).first() if not vol_exists: print( f"No files exist in the database for volume \"{volume}\"." " Either the volume does not exist or it has not been properly ingested.\n" ) exit() td = (datetime.datetime.now(pytz.utc) - datetime.timedelta(days=30)).strftime("%Y-%m-%d %H:%M:%S") testing_date = datetime.datetime.strptime(str(td), "%Y-%m-%d %H:%M:%S") if volume: volstr = '%' + volume + '%' testQ = session.query(Files).filter( Files.archiveid == archiveID, Files.filename.like(volstr)).filter( or_( cast(Files.di_date, Date) < testing_date, cast(Files.di_date, Date) is None)) else: testQ = session.query(Files).filter( Files.archiveid == archiveID).filter( or_( cast(Files.di_date, Date) < testing_date, cast(Files.di_date, Date) is None)) addcount = 0 for element in testQ: try: RQ.QueueAdd((element.filename, archive)) addcount = addcount + 1 except: logger.warning('File %s Not Added to DI_ReadyQueue', element.filename) logger.info('Files Added to Queue %s', addcount) logger.info('DI Queueing Complete')
uselist=True, cascade="save-update, merge, delete, delete-orphan") class Archives(BaseMixin, Base): __tablename__ = 'archives' archiveid = Column(Integer, primary_key=True, autoincrement=True) archive_name = Column(String(1024)) missionid = Column(Integer) pds_archive = Column(Boolean) primary_node = Column(String(64)) file = relationship('Files', backref='archives', uselist=False) try: Session, engine = db_connect(pds_db) except: Session = None engine = None if isinstance(Session, sqlalchemy.orm.sessionmaker): # Create the database if not database_exists(engine.url): create_database(engine.url, template='template_postgis' ) # This is a hardcode to the local template Base.metadata.bind = engine # If the table does not exist, this will create it. This is used in case a # user has manually dropped a table so that the project is not wrecked. Base.metadata.create_all(
def main(user_args): upc_session_maker, upc_engine = db_connect(upc_db) persist = user_args.persist log_level = user_args.log_level namespace = user_args.namespace try: slurm_job_id = os.environ['SLURM_ARRAY_JOB_ID'] slurm_array_id = os.environ['SLURM_ARRAY_TASK_ID'] except: slurm_job_id = '' slurm_array_id = '' inputfile = '' context = {'job_id': slurm_job_id, 'array_id':slurm_array_id, 'inputfile': inputfile} logger = logging.getLogger('UPC_Process') level = logging.getLevelName(log_level) logger.setLevel(level) log_file_handle = logging.FileHandler(pds_log + 'Process.log') formatter = logging.Formatter( '%(asctime)s - %(job_id)s - %(array_id)s - %(inputfile)s - %(name)s - %(levelname)s, %(message)s') log_file_handle.setFormatter(formatter) logger.addHandler(log_file_handle) logger = logging.LoggerAdapter(logger, context) # Redis Queue Objects RQ_main = RedisQueue('UPC_UpdateQueue', namespace) RQ_work = RedisQueue('UPC_UpdateWorkQueue', namespace) logger.info("UPC Update Queue: %s", RQ_main.id_name) RQ_error = RedisQueue(upc_error_queue) RQ_lock = RedisLock(lock_obj) # If the queue isn't registered, add it and set it to "running" RQ_lock.add({RQ_main.id_name: '1'}) # while there are items in the redis queue while int(RQ_main.QueueSize()) > 0 and RQ_lock.available(RQ_main.id_name): # get a file from the queue item = RQ_main.Qfile2Qwork(RQ_main.getQueueName(), RQ_work.getQueueName()) item_list = literal_eval(item) inputfile = item_list[0] archive = item_list[1] failing_command = item_list[2] update_type = item_list[3] upc_id = None if not os.path.isfile(inputfile): RQ_error.QueueAdd(f'Unable to locate or access {inputfile} during UPC update') logger.debug("%s is not a file\n", inputfile) exit() # Build URL for edr_source edr_source = inputfile.replace(workarea, web_base) # Update the logger context to include inputfile context['inputfile'] = inputfile try: session = upc_session_maker() session.close() except TypeError as e: logger.error("Unable to create a database session/connection to the upc database: %s", e) raise e try: if update_type.lower() == 'upc': recipe_file = recipe_base + "/" + archive + '.json' no_extension_inputfile = os.path.splitext(inputfile)[0] cam_info_file = no_extension_inputfile + '_caminfo.pvl' footprint_file = no_extension_inputfile + '_footprint.json' catlab_output = no_extension_inputfile + '_catlab.pvl' with open(recipe_file) as fp: upc_json = json.load(fp)['upc'] # Attempt to get the optional search_term_mapping for the upc # process try: search_term_mapping = upc_json['search_term_mapping'] except KeyError: search_term_mapping = {} # Some datasets with attached PDS labels cause PVL to hang, # so recipe includes call to dump label using `catlab` # If present, use the catlab output as pds_label instead of inputfile if os.path.exists(catlab_output): pds_label = pvl.load(catlab_output) else: pds_label = pvl.load(inputfile) instrument_name = get_instrument_name(pds_label) spacecraft_name = get_spacecraft_name(pds_label) target_name = get_target_name(pds_label) with session_scope(upc_session_maker) as session: target_qobj = Targets.create(session, targetname=target_name, displayname=target_name.title(), system=target_name) target_id = target_qobj.targetid with session_scope(upc_session_maker) as session: instrument_qobj = Instruments.create(session, instrument=instrument_name, spacecraft=spacecraft_name) instrument_id = instrument_qobj.instrumentid ######## Generate DataFiles Record ######## datafile_attributes = create_datafiles_atts(pds_label, edr_source, no_extension_inputfile + '.cub') datafile_attributes['instrumentid'] = instrument_id datafile_attributes['targetid'] = target_id with session_scope(upc_session_maker) as session: datafile_qobj = DataFiles.create(session, **datafile_attributes) upc_id = datafile_qobj.upcid ######## Generate SearchTerms Record ######## search_term_attributes = create_search_terms_atts(cam_info_file, upc_id, no_extension_inputfile + '.cub', footprint_file, search_term_mapping) search_term_attributes['targetid'] = target_id search_term_attributes['instrumentid'] = instrument_id with session_scope(upc_session_maker) as session: SearchTerms.create(session, **search_term_attributes) ######## Generate JsonKeywords Record ######## json_keywords_attributes = create_json_keywords_atts(cam_info_file, upc_id, inputfile, failing_command, logger) with session_scope(upc_session_maker) as session: JsonKeywords.create(session, **json_keywords_attributes) # Derived Processing: # If we don't have a upcid, get the matching ID from the database if not upc_id: with session_scope(upc_session_maker) as session: src = inputfile.replace(workarea, web_base) datafile = session.query(DataFiles).filter(or_(DataFiles.source == src, DataFiles.detached_label == src)).first() if not datafile: RQ_error.QueueAdd(f'No matching upcid was found for {inputfile}, ' 'derived product paths could not be added') logger.warning(f'No matching upcid was found for %s, ' 'derived product paths could not be added', inputfile) upc_id = datafile.upcid final_path = makedir(inputfile) src = os.path.splitext(inputfile)[0] derived_product = os.path.join(final_path, os.path.splitext(os.path.basename(inputfile))[0]) # If derived products exist, copy them to the derived area and add the path to the db try: shutil.move(src + '.browse.jpg', derived_product + '.browse.jpg') shutil.move(src + '.thumbnail.jpg', derived_product + '.thumbnail.jpg') add_url(derived_product, upc_id, upc_session_maker) except FileNotFoundError: RQ_error.QueueAdd(f'Unable to locate or access derived products for {inputfile}') logger.warning(f'Unable to locate or access derived products for %s', inputfile) if not persist: # Remove all files file from the workarea except for the copied # source file file_prefix = os.path.splitext(inputfile)[0] workarea_files = glob(file_prefix + '*') # os.remove(os.path.join(workarea, 'print.prt')) for file in workarea_files: os.remove(file) # Handle SQL specific database errors except SQLAlchemyError as e: logger.error("Database operation failed: %s \nRequeueing (%s, %s)", e, inputfile, archive) RQ_main.QueueAdd((inputfile, archive, failing_command, update_type)) raise e RQ_work.QueueRemove(item) # Disconnect from the engines upc_engine.dispose()
def session_maker(tables, request): Session, _ = db_connect('upc_test') return Session
def main(user_args): log_level = user_args.log_level logger = logging.getLogger('Browse_Process') level = logging.getLevelName(log_level) logger.setLevel(level) logFileHandle = logging.FileHandler(pds_log + 'Process.log') formatter = logging.Formatter( '%(asctime)s - %(name)s - %(levelname)s, %(message)s') logFileHandle.setFormatter(formatter) logger.addHandler(logFileHandle) RQ_main = RedisQueue('Browse_ReadyQueue') RQ_error = RedisQueue(upc_error_queue) RQ_lock = RedisLock(lock_obj) RQ_lock.add({RQ_main.id_name: '1'}) PDSinfoDICT = json.load(open(pds_info, 'r')) pds_session_maker, pds_engine = db_connect(pds_db) pds_session = pds_session_maker() upc_session_maker, upc_engine = db_connect(upc_db) upc_session = upc_session_maker() tid = get_tid('fullimageurl', upc_session) while int(RQ_main.QueueSize()) > 0 and RQ_lock.available(RQ_main.id_name): item = literal_eval(RQ_main.QueueGet()) inputfile = item[0] fid = item[1] archive = item[2] if os.path.isfile(inputfile): logger.info('Starting Process: %s', inputfile) finalpath = makedir(inputfile) recipeOBJ = Recipe() recipeOBJ.addMissionJson(archive, 'reduced') infile = workarea + os.path.splitext( os.path.basename(inputfile))[0] + '.Binput.cub' outfile = workarea + os.path.splitext( os.path.basename(inputfile))[0] + '.Boutput.cub' status = 'success' for item in recipeOBJ.getProcesses(): if status == 'error': logger.error("Error processing %s", inputfile) break elif status == 'success': processOBJ = Process() processOBJ.ProcessFromRecipe(item, recipeOBJ.getRecipe()) if '2isis' in item: processOBJ.updateParameter('from_', inputfile) processOBJ.updateParameter('to', outfile) elif item == 'spiceinit': processOBJ.updateParameter('from_', infile) elif item == 'cubeatt': label = pvl.load(infile) bands = PDSinfoDICT[archive]['bandorder'] query_bands = label['IsisCube']['BandBin'][ PDSinfoDICT[archive]['bandbinQuery']] # Create a set from the list / single value try: query_band_set = set(query_bands) except: query_band_set = set([query_bands]) # Iterate through 'bands' and grab the first value that is present in the # set defined by 'bandbinquery' -- if not present, default to 1 exband = next( (band for band in bands if band in query_band_set), 1) band_infile = infile + '+' + str(exband) processOBJ.updateParameter('from_', band_infile) processOBJ.updateParameter('to', outfile) elif item == 'ctxevenodd': label = pvl.load(infile) SS = label['IsisCube']['Instrument']['SpatialSumming'] if SS != 1: break else: processOBJ.updateParameter('from_', infile) processOBJ.updateParameter('to', outfile) elif item == 'reduce': label = pvl.load(infile) Nline = label['IsisCube']['Core']['Dimensions'][ 'Lines'] Nsample = label['IsisCube']['Core']['Dimensions'][ 'Samples'] Nline = int(Nline) Nsample = int(Nsample) Sfactor = scaleFactor(Nline, Nsample, recip_json) processOBJ.updateParameter('lscale', Sfactor) processOBJ.updateParameter('sscale', Sfactor) processOBJ.updateParameter('from_', infile) processOBJ.updateParameter('to', outfile) elif item == 'isis2std': final_outfile = finalpath + '/' + os.path.splitext( os.path.basename(inputfile))[0] + '.browse.jpg' processOBJ.updateParameter('from_', infile) processOBJ.updateParameter('to', final_outfile) else: processOBJ.updateParameter('from_', infile) processOBJ.updateParameter('to', outfile) for k, v in processOBJ.getProcess().items(): func = getattr(isis, k) try: func(**v) logger.info('Process %s :: Success', k) if os.path.isfile(outfile): if '.cub' in outfile: os.rename(outfile, infile) status = 'success' if '2isis' in item: isisSerial = getISISid(infile) except ProcessError as e: print(e) logger.error('Process %s :: Error', k) status = 'error' if status == 'success': DB_addURL(upc_session, isisSerial, final_outfile, tid) os.remove(infile) logger.info('Browse Process Success: %s', inputfile) AddProcessDB(pds_session, fid, 't') else: RQ_error.QueueAdd( f'Unable to locate or access {inputfile} during browse processing' ) logger.error('File %s Not Found', inputfile) upc_session.close() pds_session.close() upc_engine.dispose() pds_engine.dispose()
def tables(): _, engine = db_connect('upc_test') return engine.table_names()
def main(user_args): log_level = user_args.log_level override = user_args.override logger = logging.getLogger('Ingest_Process') level = logging.getLevelName(log_level) logger.setLevel(level) logFileHandle = logging.FileHandler(pds_log + 'Ingest.log') print("Log File: {}Ingest.log".format(pds_log)) formatter = logging.Formatter( '%(asctime)s - %(name)s - %(levelname)s, %(message)s') logFileHandle.setFormatter(formatter) logger.addHandler(logFileHandle) logger.info("Starting Ingest Process") PDSinfoDICT = json.load(open(pds_info, 'r')) RQ_main = RedisQueue('Ingest_ReadyQueue') RQ_error = RedisQueue(upc_error_queue) RQ_lock = RedisLock(lock_obj) RQ_lock.add({RQ_main.id_name: '1'}) RQ_work = RedisQueue('Ingest_WorkQueue') try: Session, engine = db_connect(pds_db) session = Session() logger.info('DataBase Connecton: Success') except: logger.error('DataBase Connection: Error') return 1 index = 1 while int(RQ_main.QueueSize()) > 0 and RQ_lock.available(RQ_main.id_name): item = literal_eval(RQ_main.QueueGet()) inputfile = item[0] archive = item[1] if not os.path.isfile(inputfile): RQ_error.QueueAdd( f'Unable to locate or access {inputfile} during ingest processing' ) logger.warn("%s is not a file\n", inputfile) continue RQ_work.QueueAdd(inputfile) subfile = inputfile.replace(PDSinfoDICT[archive]['path'], '') # Calculate checksum in chunks of 4096 f_hash = hashlib.md5() with open(inputfile, "rb") as f: for chunk in iter(lambda: f.read(4096), b""): f_hash.update(chunk) filechecksum = f_hash.hexdigest() QOBJ = session.query(Files).filter_by(filename=subfile).first() runflag = False if QOBJ is None or filechecksum != QOBJ.checksum: runflag = True if runflag or override: date = datetime.datetime.now( pytz.utc).strftime("%Y-%m-%d %H:%M:%S") fileURL = inputfile.replace(archive_base, web_base) # If all upc requirements are in 'inputfile,' flag for upc try: upcflag = all(req in inputfile for req in PDSinfoDICT[archive]['upc_reqs']) except KeyError: logger.warn( "No upc_reqs found for %s\nSetting upc eligibility False for all related files.", str(archive)) upcflag = False filesize = os.path.getsize(inputfile) try: ingest_entry = Files() if QOBJ is not None and override: ingest_entry.fileid = QOBJ.fileid ingest_entry.archiveid = PDSinfoDICT[archive]['archiveid'] ingest_entry.filename = subfile ingest_entry.entry_date = date ingest_entry.checksum = filechecksum ingest_entry.upc_required = upcflag ingest_entry.validation_required = True ingest_entry.header_only = False ingest_entry.release_date = date ingest_entry.file_url = fileURL ingest_entry.file_size = filesize ingest_entry.di_pass = True ingest_entry.di_date = date session.merge(ingest_entry) session.flush() RQ_work.QueueRemove(inputfile) index = index + 1 except Exception as e: logger.error("Error During File Insert %s : %s", str(subfile), str(e)) elif not runflag and not override: RQ_work.QueueRemove(inputfile) logger.warn( "Not running ingest: file %s already present" " in database and no override flag supplied", inputfile) if index >= 250: try: session.commit() logger.info("Commit 250 files to Database: Success") index = 1 except Exception as e: session.rollback() logger.warn("Unable to commit to database: %s", str(e)) else: logger.info("No Files Found in Ingest Queue") try: session.commit() logger.info("Commit to Database: Success") except Exception as e: logger.error("Unable to commit to database: %s", str(e)) session.rollback() # Close connection to database session.close() engine.dispose() if RQ_main.QueueSize() == 0 and RQ_work.QueueSize() == 0: logger.info("Process Complete All Queues Empty") elif RQ_main.QueueSize() == 0 and RQ_work.QueueSize() != 0: logger.warning("Process Done Work Queue NOT Empty Contains %s Files", str(RQ_work.QueueSize())) logger.info("Ingest Complete")