def get_matching_files(self): """ Gets the files matching the processing parameters. Returns ------- results : sqlalchemy.orm.query.Query A collection of files represented as a sqlalchemy query object. """ with session_scope(self.session_maker) as session: td = (datetime.datetime.now(pytz.utc) - datetime.timedelta(days=30)).strftime("%Y-%m-%d %H:%M:%S") testing_date = datetime.datetime.strptime(str(td), "%Y-%m-%d %H:%M:%S") if self.volume: volstr = '%' + self.volume + '%' results = session.query(Files).filter( Files.archiveid == self.archive_id, Files.filename.like(volstr)).filter( or_( cast(Files.di_date, Date) < testing_date, cast(Files.di_date, Date) is None)) else: results = session.query(Files).filter( Files.archiveid == self.archive_id).filter( or_( cast(Files.di_date, Date) < testing_date, cast(Files.di_date, Date) is None)) return results
def main(user_args): pds_id = user_args.pdsid delete_from_di = user_args.di delete_from_upc = user_args.upc if delete_from_di: pds_session_maker, pds_engine = db_connect(pds_db) with session_scope(pds_session_maker) as session: query_res = session.query(Files).filter( Files.filename.contains(pds_id)) num_pds_queries = len(list(query_res)) while (True): print(f'You will be deleteing {num_pds_queries} from the di ' + f"database {credentials[pds_db]['db']}") user_answer = input('Are you sure?[Y/N]:') if user_answer == 'Y' or user_answer == 'N': break else: print(f'Invalid input: {user_answer}') if user_answer == 'Y': for record in query_res: session.delete(record) if delete_from_upc: upc_session_maker, upc_engine = db_connect(upc_db) with session_scope(upc_session_maker) as session: query_res = upc_session.query(DataFiles).filter( DataFiles.productid == pds_id) num_upc_queries = len(list(query_res)) while (True): print( f'You will be deleteing {num_upc_queries} from the upc ' + f"database {credentials[upc_db]['db']}") user_answer = input('Are you sure?[Y/N]:') if user_answer == 'Y' or user_answer == 'N': break else: print(f'Invalid input: {user_answer}') if user_answer == 'Y': for record in query_res: session.delete(record)
def add_url(input_file, upc_id, session_maker): outputfile = input_file.replace(derived_base, derived_url) thumb = outputfile + '.thumbnail.jpg' browse = outputfile + '.browse.jpg' with session_scope(session_maker) as session: q_record = session.query(JsonKeywords).filter(JsonKeywords.upcid == upc_id) params = {} old_json = q_record.first().jsonkeywords old_json['browse'] = browse old_json['thumbnail'] = thumb params['jsonkeywords'] = old_json q_record.update(params, False)
def main(): Session, _ = db_connect(upc_db) path = summaries_path with session_scope(Session) as session: print("Creating Hist Table") session.execute(query) histogram_qobj = session.query("histogram_summary") total_rows = session.execute( "SELECT count(*) FROM histogram_summary;").first()[0] page_number = 0 number_of_rows_per_page = 200000 complete_json_output = [] print("Paging hist results") while True: lower_bound = page_number * number_of_rows_per_page upper_bound = (page_number * number_of_rows_per_page) + number_of_rows_per_page if upper_bound > total_rows: number_of_rows_per_page = total_rows - lower_bound json_query = "with t AS (SELECT * FROM histogram_summary LIMIT {} OFFSET {}) SELECT json_agg(t) FROM t;".format( number_of_rows_per_page, lower_bound) output = session.execute(json_query).fetchall() complete_json_output.extend([dict(line) for line in output]) page_number += 1 if upper_bound > total_rows: break print("Finished view generation") print("Writing Json") json_output = json.dumps(complete_json_output) with open(path + "histogram_summary.json", "a") as json_file: json_file.write(json_output)
def get_matching_files(self): """ Gets the files matching the processing parameters. Returns ------- results : sqlalchemy.orm.query.Query A collection of files represented as a sqlalchemy query object. """ with session_scope(self.session_maker) as session: if self.volume: volstr = '%' + self.volume + '%' results = session.query(Files).filter( Files.archiveid == self.archive_id, Files.filename.like(volstr), Files.upc_required == 't') else: results = session.query(Files).filter( Files.archiveid == self.archive_id, Files.upc_required == 't') if self.search: qf = '%' + self.search + '%' results = results.filter(Files.filename.like(qf)) return results
def main(user_args): upc_session_maker, upc_engine = db_connect(upc_db) persist = user_args.persist log_level = user_args.log_level namespace = user_args.namespace try: slurm_job_id = os.environ['SLURM_ARRAY_JOB_ID'] slurm_array_id = os.environ['SLURM_ARRAY_TASK_ID'] except: slurm_job_id = '' slurm_array_id = '' inputfile = '' context = {'job_id': slurm_job_id, 'array_id':slurm_array_id, 'inputfile': inputfile} logger = logging.getLogger('UPC_Process') level = logging.getLevelName(log_level) logger.setLevel(level) log_file_handle = logging.FileHandler(pds_log + 'Process.log') formatter = logging.Formatter( '%(asctime)s - %(job_id)s - %(array_id)s - %(inputfile)s - %(name)s - %(levelname)s, %(message)s') log_file_handle.setFormatter(formatter) logger.addHandler(log_file_handle) logger = logging.LoggerAdapter(logger, context) # Redis Queue Objects RQ_main = RedisQueue('UPC_UpdateQueue', namespace) RQ_work = RedisQueue('UPC_UpdateWorkQueue', namespace) logger.info("UPC Update Queue: %s", RQ_main.id_name) RQ_error = RedisQueue(upc_error_queue) RQ_lock = RedisLock(lock_obj) # If the queue isn't registered, add it and set it to "running" RQ_lock.add({RQ_main.id_name: '1'}) # while there are items in the redis queue while int(RQ_main.QueueSize()) > 0 and RQ_lock.available(RQ_main.id_name): # get a file from the queue item = RQ_main.Qfile2Qwork(RQ_main.getQueueName(), RQ_work.getQueueName()) item_list = literal_eval(item) inputfile = item_list[0] archive = item_list[1] failing_command = item_list[2] update_type = item_list[3] upc_id = None if not os.path.isfile(inputfile): RQ_error.QueueAdd(f'Unable to locate or access {inputfile} during UPC update') logger.debug("%s is not a file\n", inputfile) exit() # Build URL for edr_source edr_source = inputfile.replace(workarea, web_base) # Update the logger context to include inputfile context['inputfile'] = inputfile try: session = upc_session_maker() session.close() except TypeError as e: logger.error("Unable to create a database session/connection to the upc database: %s", e) raise e try: if update_type.lower() == 'upc': recipe_file = recipe_base + "/" + archive + '.json' no_extension_inputfile = os.path.splitext(inputfile)[0] cam_info_file = no_extension_inputfile + '_caminfo.pvl' footprint_file = no_extension_inputfile + '_footprint.json' catlab_output = no_extension_inputfile + '_catlab.pvl' with open(recipe_file) as fp: upc_json = json.load(fp)['upc'] # Attempt to get the optional search_term_mapping for the upc # process try: search_term_mapping = upc_json['search_term_mapping'] except KeyError: search_term_mapping = {} # Some datasets with attached PDS labels cause PVL to hang, # so recipe includes call to dump label using `catlab` # If present, use the catlab output as pds_label instead of inputfile if os.path.exists(catlab_output): pds_label = pvl.load(catlab_output) else: pds_label = pvl.load(inputfile) instrument_name = get_instrument_name(pds_label) spacecraft_name = get_spacecraft_name(pds_label) target_name = get_target_name(pds_label) with session_scope(upc_session_maker) as session: target_qobj = Targets.create(session, targetname=target_name, displayname=target_name.title(), system=target_name) target_id = target_qobj.targetid with session_scope(upc_session_maker) as session: instrument_qobj = Instruments.create(session, instrument=instrument_name, spacecraft=spacecraft_name) instrument_id = instrument_qobj.instrumentid ######## Generate DataFiles Record ######## datafile_attributes = create_datafiles_atts(pds_label, edr_source, no_extension_inputfile + '.cub') datafile_attributes['instrumentid'] = instrument_id datafile_attributes['targetid'] = target_id with session_scope(upc_session_maker) as session: datafile_qobj = DataFiles.create(session, **datafile_attributes) upc_id = datafile_qobj.upcid ######## Generate SearchTerms Record ######## search_term_attributes = create_search_terms_atts(cam_info_file, upc_id, no_extension_inputfile + '.cub', footprint_file, search_term_mapping) search_term_attributes['targetid'] = target_id search_term_attributes['instrumentid'] = instrument_id with session_scope(upc_session_maker) as session: SearchTerms.create(session, **search_term_attributes) ######## Generate JsonKeywords Record ######## json_keywords_attributes = create_json_keywords_atts(cam_info_file, upc_id, inputfile, failing_command, logger) with session_scope(upc_session_maker) as session: JsonKeywords.create(session, **json_keywords_attributes) # Derived Processing: # If we don't have a upcid, get the matching ID from the database if not upc_id: with session_scope(upc_session_maker) as session: src = inputfile.replace(workarea, web_base) datafile = session.query(DataFiles).filter(or_(DataFiles.source == src, DataFiles.detached_label == src)).first() if not datafile: RQ_error.QueueAdd(f'No matching upcid was found for {inputfile}, ' 'derived product paths could not be added') logger.warning(f'No matching upcid was found for %s, ' 'derived product paths could not be added', inputfile) upc_id = datafile.upcid final_path = makedir(inputfile) src = os.path.splitext(inputfile)[0] derived_product = os.path.join(final_path, os.path.splitext(os.path.basename(inputfile))[0]) # If derived products exist, copy them to the derived area and add the path to the db try: shutil.move(src + '.browse.jpg', derived_product + '.browse.jpg') shutil.move(src + '.thumbnail.jpg', derived_product + '.thumbnail.jpg') add_url(derived_product, upc_id, upc_session_maker) except FileNotFoundError: RQ_error.QueueAdd(f'Unable to locate or access derived products for {inputfile}') logger.warning(f'Unable to locate or access derived products for %s', inputfile) if not persist: # Remove all files file from the workarea except for the copied # source file file_prefix = os.path.splitext(inputfile)[0] workarea_files = glob(file_prefix + '*') # os.remove(os.path.join(workarea, 'print.prt')) for file in workarea_files: os.remove(file) # Handle SQL specific database errors except SQLAlchemyError as e: logger.error("Database operation failed: %s \nRequeueing (%s, %s)", e, inputfile, archive) RQ_main.QueueAdd((inputfile, archive, failing_command, update_type)) raise e RQ_work.QueueRemove(item) # Disconnect from the engines upc_engine.dispose()