def main(): RQ = RedisQueue('LinkQueue') while int(RQ.QueueSize()) > 0: # Grab a tuple of values from the redis queue item = literal_eval(RQ.QueueGet().decode('utf-8')) # Split tuple into two values inputfile = item[0] archive = item[1] json_file_path = recipe_base + archive + '.json' try: with open(json_file_path, 'r') as f: json_dict = json.load(f) except (ValueError): continue link_src_path = json_dict['src'] voldesc = load_pvl(inputfile) dataset_id = voldesc['VOLUME']['DATA_SET_ID'] volume_id = voldesc['VOLUME']['VOLUME_ID'] # if more than one dataset id exists, link each of them if isinstance(dataset_id, (list, tuple, set)): [link(link_src_path, link_dest, volume_id, x) for x in dataset_id] else: # Not container type link(link_src_path, link_dest, volume_id, dataset_id)
def main(): args = Args() args.parse_args() logger = logging.getLogger('FinalJobber') level = logging.getLevelName(args.log_level) logger.setLevel(level) logFileHandle = logging.FileHandler(pds_log + 'Service.log') formatter = logging.Formatter( '%(asctime)s - %(name)s - %(levelname)s, %(message)s') logFileHandle.setFormatter(formatter) logger.addHandler(logFileHandle) #***************Look at Final queue for work************ RQ_final = RedisQueue('FinalQueue') logger.info("Reddis Queue: %s", RQ_final.id_name) if int(RQ_final.QueueSize()) == 0: logging.info('Nothing Found in Final Queue') else: FKey = RQ_final.QueueGet() logger.info('Found %s in Final Queue', str(FKey)) # ** *************** HPC job stuff *********************** logger.info('HPC Cluster job Submission Starting') jobOBJ = HPCjob() jobOBJ.setJobName(FKey + '_Final') jobOBJ.setStdOut(slurm_log + FKey + '_%A_%a.out') jobOBJ.setStdError(slurm_log + FKey + '_%A_%a.err') jobOBJ.setWallClock('24:00:00') jobOBJ.setMemory('8192') jobOBJ.setPartition('pds') cmd = cmd_dir + 'ServiceFinal.py ' + FKey jobOBJ.setCommand(cmd) logger.info('HPC Command: %s', cmd) #SBfile = '/scratch/pds_services/' + FKey + '/' + FKey + '_final.sbatch' SBfile = scratch + FKey + '/' + FKey + '_final.sbatch' jobOBJ.MakeJobFile(SBfile) try: sb = open(SBfile) sb.close() logger.info('SBATCH File Creation: Success') except IOError as e: logger.error('SBATCH File %s Not Found', SBfile) try: jobOBJ.Run() logger.info('Job Submission to HPC: Success') except IOError as e: logger.error('Jobs NOT Submitted to HPC\n%s', e)
def main(): RQ = RedisQueue('LinkQueue') args = Args() args.parse_args() logger = logging.getLogger('LINK_Process') level = logging.getLevelName(args.log_level) logger.setLevel(level) logFileHandle = logging.FileHandler(pds_log + 'Link.log') formatter = logging.Formatter( '%(asctime)s - %(name)s - %(levelname)s, %(message)s') logFileHandle.setFormatter(formatter) logger.addHandler(logFileHandle) while int(RQ.QueueSize()) > 0: # Grab a tuple of values from the redis queue item = literal_eval(RQ.QueueGet().decode('utf-8')) # Split tuple into two values inputfile = item[0] archive = item[1] json_file_path = recipe_base + archive + '.json' try: with open(json_file_path, 'r') as f: json_dict = json.load(f) except ValueError as e: logging.warn(e) continue link_src_path = json_dict['src'] voldesc = load_pvl(inputfile) dataset_id = voldesc['VOLUME']['DATA_SET_ID'] volume_id = voldesc['VOLUME']['VOLUME_ID'] # if more than one dataset id exists, link each of them if isinstance(dataset_id, (list, tuple, set)): [link(link_src_path, link_dest, volume_id, x) for x in dataset_id] else: # Not container type link(link_src_path, link_dest, volume_id, dataset_id)
def main(): args = Args() args.parse_args() override = args.override logger = logging.getLogger('Ingest_Process') level = logging.getLevelName(args.log_level) logger.setLevel(level) logFileHandle = logging.FileHandler(pds_log + 'Ingest.log') print("Log File: {}Ingest.log".format(pds_log)) formatter = logging.Formatter( '%(asctime)s - %(name)s - %(levelname)s, %(message)s') logFileHandle.setFormatter(formatter) logger.addHandler(logFileHandle) logger.info("Starting Ingest Process") PDSinfoDICT = json.load(open(pds_info, 'r')) RQ_main = RedisQueue('Ingest_ReadyQueue') RQ_lock = RedisLock(lock_obj) RQ_lock.add({RQ_main.id_name: '1'}) RQ_work = RedisQueue('Ingest_WorkQueue') RQ_upc = RedisQueue('UPC_ReadyQueue') RQ_thumb = RedisQueue('Thumbnail_ReadyQueue') RQ_browse = RedisQueue('Browse_ReadyQueue') logger.info("UPC Queue: %s", RQ_upc.id_name) logger.info("Thumbnail Queue: %s", RQ_thumb.id_name) logger.info("Browse Queue: %s", RQ_browse.id_name) try: session, engine = db_connect(pds_db) logger.info('DataBase Connecton: Success') except: logger.error('DataBase Connection: Error') return 1 index = 1 while int(RQ_main.QueueSize()) > 0 and RQ_lock.available(RQ_main.id_name): item = literal_eval(RQ_main.QueueGet().decode("utf-8")) inputfile = item[0] archive = item[1] RQ_work.QueueAdd(inputfile) subfile = inputfile.replace(PDSinfoDICT[archive]['path'], '') # Calculate checksum in chunks of 4096 f_hash = hashlib.md5() with open(inputfile, "rb") as f: for chunk in iter(lambda: f.read(4096), b""): f_hash.update(chunk) filechecksum = f_hash.hexdigest() QOBJ = session.query(Files).filter_by(filename=subfile).first() runflag = False if QOBJ is None or filechecksum != QOBJ.checksum: runflag = True if runflag or override: date = datetime.datetime.now( pytz.utc).strftime("%Y-%m-%d %H:%M:%S") fileURL = inputfile.replace(archive_base, web_base) # If all upc requirements are in 'inputfile,' flag for upc upcflag = all(x in inputfile for x in PDSinfoDICT[archive]['upc_reqs']) filesize = os.path.getsize(inputfile) try: # If we found an existing file and want to overwrite the data if QOBJ is not None and override: ingest_entry = QOBJ # If the file was not found, create a new entry else: ingest_entry = Files() ingest_entry.archiveid = PDSinfoDICT[archive]['archiveid'] ingest_entry.filename = subfile ingest_entry.entry_date = date ingest_entry.checksum = filechecksum ingest_entry.upc_required = upcflag ingest_entry.validation_required = True ingest_entry.header_only = False ingest_entry.release_date = date ingest_entry.file_url = fileURL ingest_entry.file_size = filesize ingest_entry.di_pass = True ingest_entry.di_date = date session.merge(ingest_entry) session.flush() if upcflag: RQ_upc.QueueAdd((inputfile, ingest_entry.fileid, archive)) RQ_thumb.QueueAdd( (inputfile, ingest_entry.fileid, archive)) RQ_browse.QueueAdd( (inputfile, ingest_entry.fileid, archive)) #RQ_pilotB.QueueAdd((inputfile,ingest_entry.fileid, archive)) RQ_work.QueueRemove(inputfile) index = index + 1 except Exception as e: logger.error("Error During File Insert %s : %s", str(subfile), str(e)) elif not runflag and not override: RQ_work.QueueRemove(inputfile) logger.warn( "Not running ingest: file %s already present" " in database and no override flag supplied", inputfile) if index >= 250: try: session.commit() logger.info("Commit 250 files to Database: Success") index = 1 except Exception as e: session.rollback() logger.warn("Unable to commit to database: %s", str(e)) else: logger.info("No Files Found in Ingest Queue") try: session.commit() logger.info("Commit to Database: Success") except Exception as e: logger.error("Unable to commit to database: %s", str(e)) session.rollback() # Close connection to database session.close() engine.dispose() if RQ_main.QueueSize() == 0 and RQ_work.QueueSize() == 0: logger.info("Process Complete All Queues Empty") elif RQ_main.QueueSize() == 0 and RQ_work.QueueSize() != 0: logger.warning("Process Done Work Queue NOT Empty Contains %s Files", str(RQ_work.QueueSize())) logger.info("Ingest Complete")
def main(): # Connect to database - ignore engine information pds_session, pds_engine = db_connect(pds_db) # Connect to database - ignore engine information session, upc_engine = db_connect(upc_db) # ***************** Set up logging ***************** logger = logging.getLogger('UPC_Process') logger.setLevel(logging.INFO) logFileHandle = logging.FileHandler(pds_log + 'Process.log') formatter = logging.Formatter( '%(asctime)s - %(name)s - %(levelname)s, %(message)s') logFileHandle.setFormatter(formatter) logger.addHandler(logFileHandle) PDSinfoDICT = json.load(open(pds_info, 'r')) # Redis Queue Objects RQ_main = RedisQueue('UPC_ReadyQueue') logger.info("UPC Processing Queue: %s", RQ_main.id_name) RQ_lock = RedisLock(lock_obj) # If the queue isn't registered, add it and set it to "running" RQ_lock.add({RQ_main.id_name: '1'}) proc_date_tid = get_tid('processdate', session) err_type_tid = get_tid('errortype', session) err_msg_tid = get_tid('errormessage', session) err_flag_tid = get_tid('error', session) isis_footprint_tid = get_tid('isisfootprint', session) isis_centroid_tid = get_tid('isiscentroid', session) start_time_tid = get_tid('starttime', session) stop_time_tid = get_tid('stoptime', session) checksum_tid = get_tid('checksum', session) # while there are items in the redis queue while int(RQ_main.QueueSize()) > 0 and RQ_lock.available(RQ_main.id_name): # get a file from the queue item = literal_eval(RQ_main.QueueGet().decode("utf-8")) inputfile = item[0] fid = item[1] archive = item[2] #inputfile = (RQ_main.QueueGet()).decode('utf-8') if os.path.isfile(inputfile): pass else: print("{} is not a file\n".format(inputfile)) if os.path.isfile(inputfile): logger.info('Starting Process: %s', inputfile) # @TODO refactor this logic. We're using an object to find a path, returning it, # then passing it back to the object so that the object can use it. recipeOBJ = Recipe() recipe_json = recipeOBJ.getRecipeJSON(archive) #recipe_json = recipeOBJ.getRecipeJSON(getMission(str(inputfile))) recipeOBJ.AddJsonFile(recipe_json, 'upc') infile = workarea + os.path.splitext( str(os.path.basename(inputfile)))[0] + '.UPCinput.cub' outfile = workarea + os.path.splitext( str(os.path.basename(inputfile)))[0] + '.UPCoutput.cub' caminfoOUT = workarea + os.path.splitext( str(os.path.basename(inputfile)))[0] + '_caminfo.pvl' EDRsource = inputfile.replace( '/pds_san/PDS_Archive/', 'https://pdsimage.wr.ugs.gov/Missions/') status = 'success' # Iterate through each process listed in the recipe for item in recipeOBJ.getProcesses(): # If any of the processes failed, discontinue processing if status.lower() == 'error': break elif status.lower() == 'success': processOBJ = Process() processOBJ.ProcessFromRecipe(item, recipeOBJ.getRecipe()) # Handle processing based on string description. if '2isis' in item: processOBJ.updateParameter('from_', inputfile) processOBJ.updateParameter('to', outfile) elif item == 'thmproc': processOBJ.updateParameter('from_', inputfile) processOBJ.updateParameter('to', outfile) thmproc_odd = str(workarea) + str( os.path.splitext(os.path.basename(inputfile)) [0]) + '.UPCoutput.raw.odd.cub' thmproc_even = str(workarea) + str( os.path.splitext(os.path.basename(inputfile)) [0]) + '.UPCoutput.raw.even.cub' elif item == 'handmos': processOBJ.updateParameter('from_', thmproc_even) processOBJ.updateParameter('mosaic', thmproc_odd) elif item == 'spiceinit': processOBJ.updateParameter('from_', infile) elif item == 'cubeatt': band_infile = infile + '+' + str(1) processOBJ.updateParameter('from_', band_infile) processOBJ.updateParameter('to', outfile) elif item == 'footprintinit': processOBJ.updateParameter('from_', infile) elif item == 'caminfo': processOBJ.updateParameter('from_', infile) processOBJ.updateParameter('to', caminfoOUT) else: processOBJ.updateParameter('from_', infile) processOBJ.updateParameter('to', outfile) pwd = os.getcwd() # iterate through functions listed in process obj for k, v in processOBJ.getProcess().items(): # load a function into func func = getattr(isis, k) try: os.chdir(workarea) # execute function func(**v) os.chdir(pwd) if item == 'handmos': if os.path.isfile(thmproc_odd): os.rename(thmproc_odd, infile) else: if os.path.isfile(outfile): os.rename(outfile, infile) status = 'success' if '2isis' in item: label = pvl.load(infile) infile_bandlist = label['IsisCube']['BandBin'][ PDSinfoDICT[archive]['bandbinQuery']] infile_centerlist = label['IsisCube'][ 'BandBin']['Center'] elif item == 'thmproc': pass elif item == 'handmos': label = pvl.load(infile) infile_bandlist = label['IsisCube']['BandBin'][ PDSinfoDICT[archive]['bandbinQuery']] infile_centerlist = label['IsisCube'][ 'BandBin']['Center'] except ProcessError as e: print(e) status = 'error' processError = item # keyword definitions keywordsOBJ = None if status.lower() == 'success': try: keywordsOBJ = UPCkeywords(caminfoOUT) except: with open(caminfoOUT, 'r') as f: filedata = f.read() filedata = filedata.replace(';', '-').replace('&', '-') filedata = re.sub(r'\-\s+', r'', filedata, flags=re.M) with open(caminfoOUT, 'w') as f: f.write(filedata) keywordsOBJ = UPCkeywords(caminfoOUT) target_Qobj = session.query(upc_models.Targets).filter( upc_models.Targets.targetname == keywordsOBJ.getKeyword( 'TargetName').upper()).first() instrument_Qobj = session.query(upc_models.Instruments).filter( upc_models.Instruments.instrument == keywordsOBJ.getKeyword('InstrumentId')).first() if session.query(upc_models.DataFiles).filter( upc_models.DataFiles.isisid == keywordsOBJ.getKeyword( 'IsisId')).first() is None: test_input = upc_models.DataFiles( isisid=keywordsOBJ.getKeyword('IsisId'), productid=keywordsOBJ.getKeyword('ProductId'), edr_source=EDRsource, edr_detached_label='', instrumentid=instrument_Qobj.instrumentid, targetid=target_Qobj.targetid) session.merge(test_input) session.commit() Qobj = session.query(upc_models.DataFiles).filter( upc_models.DataFiles.isisid == keywordsOBJ.getKeyword( 'IsisId')).first() UPCid = Qobj.upcid print(UPCid) # block to add band information to meta_bands if isinstance(infile_bandlist, list): index = 0 while index < len(infile_bandlist): B_DBinput = upc_models.MetaBands( upcid=UPCid, filter=str(infile_bandlist[index]), centerwave=infile_centerlist[index]) session.merge(B_DBinput) index = index + 1 else: try: # If infile_centerlist is in "Units" format, grab the value f_centerlist = float(infile_centerlist[0]) except TypeError: f_centerlist = float(infile_centerlist) B_DBinput = upc_models.MetaBands(upcid=UPCid, filter=infile_bandlist, centerwave=f_centerlist) session.merge(B_DBinput) session.commit() # Block to add common keywords testjson = json.load(open(keyword_def, 'r')) for element_1 in testjson['instrument']['COMMON']: keyvalue = "" keytype = testjson['instrument']['COMMON'][element_1][ 'type'] keyword = testjson['instrument']['COMMON'][element_1][ 'keyword'] keyword_Qobj = session.query(upc_models.Keywords).filter( and_(upc_models.Keywords.typename == element_1, upc_models.Keywords.instrumentid == 1)).first() if keyword_Qobj is None: continue else: keyvalue = keywordsOBJ.getKeyword(keyword) if keyvalue is None: continue keyvalue = db2py(keytype, keyvalue) try: DBinput = upc_models.create_table( keytype, upcid=UPCid, typeid=keyword_Qobj.typeid, value=keyvalue) except Exception as e: logger.warn("Unable to enter %s into table\n\n%s", keytype, e) continue session.merge(DBinput) try: session.flush() except: logger.warn("Unable to flush database connection") session.commit() for element_1 in testjson['instrument'][archive]: keyvalue = "" keytype = testjson['instrument'][archive][element_1][ 'type'] keyword = testjson['instrument'][archive][element_1][ 'keyword'] keyword_Qobj = session.query(upc_models.Keywords).filter( and_( upc_models.Keywords.typename == element_1, upc_models.Keywords.instrumentid.in_( (1, instrument_Qobj.instrumentid)))).first() if keyword_Qobj is None: continue else: keyvalue = keywordsOBJ.getKeyword(keyword) if keyvalue is None: logger.debug("Keyword %s not found", keyword) continue keyvalue = db2py(keytype, keyvalue) try: DBinput = upc_models.create_table( keytype, upcid=UPCid, typeid=keyword_Qobj.typeid, value=keyvalue) except Exception as e: logger.warn("Unable to enter %s into database\n\n%s", keytype, e) continue session.merge(DBinput) try: session.flush() except: logger.warn("Unable to flush database connection") session.commit() # geometry stuff G_centroid = 'point ({} {})'.format( str(keywordsOBJ.getKeyword('CentroidLongitude')), str(keywordsOBJ.getKeyword('CentroidLatitude'))) G_keyword_Qobj = session.query( upc_models.Keywords.typeid).filter( upc_models.Keywords.typename == 'isiscentroid').first() G_footprint_Qobj = session.query( upc_models.Keywords.typeid).filter( upc_models.Keywords.typename == 'isisfootprint').first() G_footprint = keywordsOBJ.getKeyword('GisFootprint') G_DBinput = upc_models.MetaGeometry(upcid=UPCid, typeid=G_keyword_Qobj, value=G_centroid) session.merge(G_DBinput) G_DBinput = upc_models.MetaGeometry(upcid=UPCid, typeid=G_footprint_Qobj, value=G_footprint) session.merge(G_DBinput) session.flush() session.commit() f_hash = hashlib.md5() with open(inputfile, "rb") as f: for chunk in iter(lambda: f.read(4096), b""): f_hash.update(chunk) checksum = f_hash.hexdigest() DBinput = upc_models.MetaString(upcid=UPCid, typeid=checksum_tid, value=checksum) session.merge(DBinput) DBinput = upc_models.MetaBoolean(upcid=UPCid, typeid=err_flag_tid, value=False) session.merge(DBinput) session.commit() AddProcessDB(pds_session, fid, True) os.remove(infile) os.remove(caminfoOUT) elif status.lower() == 'error': try: label = pvl.load(infile) except Exception as e: logger.info('%s', e) continue date = datetime.datetime.now( pytz.utc).strftime("%Y-%m-%d %H:%M:%S") if '2isis' in processError or processError == 'thmproc': if session.query(upc_models.DataFiles).filter( upc_models.DataFiles.edr_source == EDRsource.decode("utf-8")).first() is None: target_Qobj = session.query(upc_models.Targets).filter( upc_models.Targets.targetname == str( label['IsisCube']['Instrument'] ['TargetName']).upper()).first() instrument_Qobj = session.query( upc_models.Instruments).filter( upc_models.Instruments.instrument == str( label['IsisCube']['Instrument'] ['InstrumentId'])).first() error1_input = upc_models.DataFiles( isisid='1', edr_source=EDRsource) session.merge(error1_input) session.commit() EQ1obj = session.query(upc_models.DataFiles).filter( upc_models.DataFiles.edr_source == EDRsource).first() UPCid = EQ1obj.upcid errorMSG = 'Error running {} on file {}'.format( processError, inputfile) DBinput = MetaTime(upcid=UPCid, typeid=proc_date_tid, value=date) session.merge(DBinput) DBinput = MetaString(upcid=UPCid, typeid=err_type_tid, value=processError) session.merge(DBinput) DBinput = MetaString(upcid=UPCid, typeid=err_msg_tid, value=errorMSG) session.merge(DBinput) DBinput = MetaBoolean(upcid=UPCid, typeid=err_flag_tid, value=True) session.merge(DBinput) DBinput = MetaGeometry(upcid=UPCid, typeid=isis_footprint_tid, value='POINT(361 0)') session.merge(DBinput) DBinput = MetaGeometry(upcid=UPCid, typeid=isis_centroid_tid, value='POINT(361 0)') session.merge(DBinput) session.commit() else: try: label = pvl.load(infile) except Exception as e: logger.warn('%s', e) continue isisSerial = getISISid(infile) if session.query(upc_models.DataFiles).filter( upc_models.DataFiles.isisid == isisSerial).first() is None: target_Qobj = session.query(upc_models.Targets).filter( upc_models.Targets.targetname == str( label['IsisCube']['Instrument'] ['TargetName']).upper()).first() instrument_Qobj = session.query( upc_models.Instruments).filter( upc_models.Instruments.instrument == str( label['IsisCube']['Instrument'] ['InstrumentId'])).first() if target_Qobj is None or instrument_Qobj is None: continue error2_input = upc_models.DataFiles( isisid=isisSerial, productid=label['IsisCube']['Archive'] ['ProductId'], edr_source=EDRsource, instrumentid=instrument_Qobj.instrumentid, targetid=target_Qobj.targetid) session.merge(error2_input) session.commit() try: EQ2obj = session.query(upc_models.DataFiles).filter( upc_models.DataFiles.isisid == isisSerial).first() UPCid = EQ2obj.upcid errorMSG = 'Error running {} on file {}'.format( processError, inputfile) DBinput = MetaTime(upcid=UPCid, typeid=proc_date_tid, value=date) session.merge(DBinput) DBinput = MetaString(upcid=UPCid, typeid=err_type_tid, value=processError) session.merge(DBinput) DBinput = MetaString(upcid=UPCid, typeid=err_msg_tid, value=errorMSG) session.merge(DBinput) DBinput = MetaBoolean(upcid=UPCid, typeid=err_flag_tid, value=True) session.merge(DBinput) DBinput = MetaGeometry(upcid=UPCid, typeid=isis_footprint_tid, value='POINT(361 0)') session.merge(DBinput) DBinput = MetaGeometry(upcid=UPCid, typeid=isis_centroid_tid, value='POINT(361 0)') session.merge(DBinput) except: pass try: v = label['IsisCube']['Instrument']['StartTime'] except KeyError: v = None except: continue try: DBinput = MetaTime(upcid=UPCid, typeid=start_time_tid, value=v) session.merge(DBinput) except: continue try: v = label['IsisCube']['Instrument']['StopTime'] except KeyError: v = None DBinput = MetaTime(upcid=UPCid, typeid=stop_time_tid, value=v) session.merge(DBinput) session.commit() AddProcessDB(pds_session, fid, False) os.remove(infile) # Disconnect from db sessions pds_session.close() session.close() # Disconnect from the engines pds_engine.dispose() upc_engine.dispose() logger.info("UPC processing exited successfully")
def main(): # pdb.set_trace() PDSinfoDICT = json.load(open(pds_info, 'r')) # ********* Set up logging ************* logger = logging.getLogger('DI_Process') logger.setLevel(logging.INFO) #logFileHandle = logging.FileHandler('/usgs/cdev/PDS/logs/DI.log') logFileHandle = logging.FileHandler(pds_log + 'DI.log') #logFileHandle = logging.FileHandler('/home/arsanders/PDS-Pipelines/logs/DI.log') formatter = logging.Formatter( '%(asctime)s - %(name)s - %(levelname)s, %(message)s') logFileHandle.setFormatter(formatter) logger.addHandler(logFileHandle) logger.info('Starting DI Process') try: # ignores engine information session, _ = db_connect(pds_db) logger.info('DataBase Connecton: Success') except: logger.error('DataBase Connection: Error') RQ = RedisQueue('DI_ReadyQueue') index = 0 while int(RQ.QueueSize()) > 0: item = literal_eval(RQ.QueueGet().decode("utf-8")) inputfile = item[0] archive = item[1] try: Qelement = session.query(Files).filter( Files.filename == inputfile).one() except: logger.error('Query for File: %s', inputfile) archive_path = PDSinfoDICT[archive]['path'] cpfile = archive_path + Qelement.filename if os.path.isfile(cpfile): f_hash = hashlib.md5() with open(cpfile, "rb") as f: for chunk in iter(lambda: f.read(4096), b""): f_hash.update(chunk) checksum = f_hash.hexdigest() if checksum == Qelement.checksum: Qelement.di_pass = True else: Qelement.di_pass = False Qelement.di_date = datetime.datetime.now( pytz.utc).strftime("%Y-%m-%d %H:%M:%S") session.flush() index = index + 1 if index > 50: session.commit() logger.info('Session Commit for 50 Records: Success') index = 0 else: logger.error('File %s Not Found', cpfile) try: session.commit() logger.info("End Commit DI process to Database: Success") index = 1 except: session.rollback()
def main(): # pdb.set_trace() ##***************** Set up logging ***************** logger = logging.getLogger('Browse_Process') logger.setLevel(logging.INFO) logFileHandle = logging.FileHandler(pds_log + 'Process.log') formatter = logging.Formatter( '%(asctime)s - %(name)s - %(levelname)s, %(message)s') logFileHandle.setFormatter(formatter) logger.addHandler(logFileHandle) RQ_main = RedisQueue('Browse_ReadyQueue') PDSinfoDICT = json.load(open(pds_info, 'r')) pds_session, _ = db_connect(pds_db) upc_session, _ = db_connect(upc_db) tid = get_tid('fullimageurl', upc_session) while int(RQ_main.QueueSize()) > 0: item = literal_eval(RQ_main.QueueGet().decode("utf-8")) inputfile = item[0] fid = item[1] archive = item[2] if os.path.isfile(inputfile): logger.info('Starting Process: %s', inputfile) finalpath = makedir(inputfile) recipeOBJ = Recipe() recip_json = recipeOBJ.getRecipeJSON(archive) recipeOBJ.AddJsonFile(recip_json, 'reduced') infile = workarea + os.path.splitext( os.path.basename(inputfile))[0] + '.Binput.cub' outfile = workarea + os.path.splitext( os.path.basename(inputfile))[0] + '.Boutput.cub' status = 'success' for item in recipeOBJ.getProcesses(): if status == 'error': break elif status == 'success': processOBJ = Process() processR = processOBJ.ProcessFromRecipe( item, recipeOBJ.getRecipe()) if '2isis' in item: processOBJ.updateParameter('from_', inputfile) processOBJ.updateParameter('to', outfile) elif item == 'spiceinit': processOBJ.updateParameter('from_', infile) elif item == 'cubeatt': label = pvl.load(infile) bands = PDSinfoDICT[archive]['bandorder'] query_bands = label['IsisCube']['BandBin'][ PDSinfoDICT[archive]['bandbinQuery']] # Create a set from the list / single value try: query_band_set = set(query_bands) except: query_band_set = set([query_bands]) # Iterate through 'bands' and grab the first value that is present in the # set defined by 'bandbinquery' -- if not present, default to 1 exband = next( (band for band in bands if band in query_band_set), 1) band_infile = infile + '+' + str(exband) processOBJ.updateParameter('from_', band_infile) processOBJ.updateParameter('to', outfile) elif item == 'ctxevenodd': label = pvl.load(infile) SS = label['IsisCube']['Instrument']['SpatialSumming'] if SS != 1: break else: processOBJ.updateParameter('from_', infile) processOBJ.updateParameter('to', outfile) elif item == 'reduce': label = pvl.load(infile) Nline = label['IsisCube']['Core']['Dimensions'][ 'Lines'] Nsample = label['IsisCube']['Core']['Dimensions'][ 'Samples'] Nline = int(Nline) Nsample = int(Nsample) Sfactor = scaleFactor(Nline, Nsample, recip_json) processOBJ.updateParameter('lscale', Sfactor) processOBJ.updateParameter('sscale', Sfactor) processOBJ.updateParameter('from_', infile) processOBJ.updateParameter('to', outfile) elif item == 'isis2std': final_outfile = finalpath + '/' + os.path.splitext( os.path.basename(inputfile))[0] + '.browse.jpg' processOBJ.updateParameter('from_', infile) processOBJ.updateParameter('to', final_outfile) else: processOBJ.updateParameter('from_', infile) processOBJ.updateParameter('to', outfile) for k, v in processOBJ.getProcess().items(): func = getattr(isis, k) try: func(**v) logger.info('Process %s :: Success', k) if os.path.isfile(outfile): if '.cub' in outfile: os.rename(outfile, infile) status = 'success' if '2isis' in item: isisSerial = getISISid(infile) except ProcessError as e: print(e) logger.error('Process %s :: Error', k) status = 'error' if status == 'success': DB_addURL(upc_session, isisSerial, final_outfile, tid) os.remove(infile) logger.info('Browse Process Success: %s', inputfile) AddProcessDB(pds_session, fid, 't') else: logger.error('File %s Not Found', inputfile)
def main(): PDSinfoDICT = json.load(open(pds_info, 'r')) args = Args() args.parse_args() # Set up logging logger = logging.getLogger('DI_Process') level = logging.getLevelName(args.log_level) logger.setLevel(level) logFileHandle = logging.FileHandler(pds_log + 'DI.log') formatter = logging.Formatter( '%(asctime)s - %(name)s - %(levelname)s, %(message)s') logFileHandle.setFormatter(formatter) logger.addHandler(logFileHandle) logger.info('Starting DI Process') try: session, engine = db_connect(pds_db) logger.info('DataBase Connecton: Success') except Exception as e: logger.error('DataBase Connection Error: %s', str(e)) return 1 RQ = RedisQueue('DI_ReadyQueue') RQ_lock = RedisLock(lock_obj) RQ_lock.add({RQ.id_name: '1'}) index = 0 logger.info("DI Queue: %s", RQ.id_name) while int(RQ.QueueSize()) > 0 and RQ_lock.available(RQ.id_name): item = literal_eval(RQ.QueueGet().decode("utf-8")) inputfile = item[0] archive = item[1] try: Qelement = session.query(Files).filter( Files.filename == inputfile).one() except Exception as e: logger.warn('Filename query failed for inputfile %s: %s', inputfile, str(e)) continue archive_path = PDSinfoDICT[archive]['path'] cpfile = archive_path + Qelement.filename if os.path.isfile(cpfile): f_hash = hashlib.md5() with open(cpfile, "rb") as f: for chunk in iter(lambda: f.read(4096), b""): f_hash.update(chunk) checksum = f_hash.hexdigest() Qelement.di_pass = checksum == Qelement.checksum Qelement.di_date = datetime.datetime.now( pytz.utc).strftime("%Y-%m-%d %H:%M:%S") session.flush() index = index + 1 if index > 50: session.commit() logger.info('Session Commit for 50 Records: Success') index = 0 else: logger.warn('File %s Not Found', cpfile) try: session.commit() logger.info("End Commit DI process to Database: Success") index = 1 except Exception as e: logger.warn("Unable to commit changes to database\n\n%s", e) session.rollback() # Close connection to database session.close() engine.dispose()
def main(): # pdb.set_trace() archiveID = {16: '/pds_san/PDS_Archive/Mars_Reconnaissance_Orbiter/CTX/', 74: '/pds_san/PDS_Archive/Lunar_Reconnaissance_Orbiter/LROC/EDR/', 124: '/pds_san/PDS_Archive/Mars_Reconnaissance_Orbiter/HiRISE/', 101: '/pds_san/PDS_Archive/Apollo/Rock_Sample_Images/' } args = Args() args.parse_args() logger = logging.getLogger('DI_Process') level = argparse.getLevelName(args.log_level) logger.setLevel(level) logFileHandle = logging.FileHandler(pds_log + 'DI.log') formatter = logging.Formatter( '%(asctime)s - %(name)s - %(levelname)s, %(message)s') logFileHandle.setFormatter(formatter) logger.addHandler(logFileHandle) logger.info('Starting DI Process') try: # Throws away engine information session, _ = db_connect(pds_db) logger.info('DataBase Connecton: Success') except: logger.error('DataBase Connection: Error') return 1 RQ = RedisQueue('ChecksumUpdate_Queue') index = 0 count = 0 while int(RQ.QueueSize()) > 0: inputfile = RQ.QueueGet() Qelement = session.query(Files).filter( Files.filename == inputfile).one() cpfile = archiveID[Qelement.archiveid] + Qelement.filename if os.path.isfile(cpfile): # Calculate checksum in chunks of 4096 f_hash = hashlib.md5() with open(cpfile, "rb") as f: for chunk in iter(lambda: f.read(4096), b""): f_hash.update(chunk) checksum = f_hash.hexdigest() if checksum != Qelement.checksum: Qelement.checksum = checksum Qelement.di_pass = '******' Qelement.di_date = datetime.datetime.now( pytz.utc).strftime("%Y-%m-%d %H:%M:%S") session.flush() index = index + 1 count = count + 1 logger.info('Update Checksum %s: Success', inputfile) if count > 25: session.commit() logger.info('Session Commit for 25 Records: Success') count = 0 else: logger.error('File %s Not Found', cpfile) try: session.commit() except: session.rollback() logger.error('Error during commit') logger.info("End Commit DI process to Database: Success") logger.info('Checksum for %s Files Updated', str(index))