Python db_connect примеры, pds_pipelines.db.db_connect Python примеры использования

Пример #1

0

Показать файл

def main(user_args):
    pds_id = user_args.pdsid
    delete_from_di = user_args.di
    delete_from_upc = user_args.upc

    if delete_from_di:
        pds_session_maker, pds_engine = db_connect(pds_db)
        pds_session = pds_session_maker()

        query_res = pds_session.query(Files).filter(
                    Files.filename.contains(pds_id))
        num_pds_queries = len(list(query_res))

        while(True):
            print(f'You will be deleteing {num_pds_queries} from the di ' +
                  f"database {credentials[pds_db]['db']}")
            user_answer = input('Are you sure?[Y/N]:')

            if user_answer == 'Y' or user_answer == 'N':
                break
            else:
                print(f'Invalid input: {user_answer}')

        if user_answer == 'Y':
            for record in query_res:
                pds_session.delete(record)
        pds_session.commit()
        pds_session.flush()
        pds_session.close()

    if delete_from_upc:
        upc_session_maker, upc_engine = db_connect(upc_db)
        upc_session = upc_session_maker()

        query_res = upc_session.query(DataFiles).filter(
            DataFiles.productid == pds_id)
        num_upc_queries = len(list(query_res))
        while(True):
            print(f'You will be deleteing {num_upc_queries} from the upc ' +
                  f"database {credentials[upc_db]['db']}")
            user_answer = input('Are you sure?[Y/N]:')

            if user_answer =='Y' or user_answer == 'N':
                break
            else:
                print(f'Invalid input: {user_answer}')

        if user_answer == 'Y':
            for record in query_res:
                upc_session.delete(record)
        upc_session.commit()
        upc_session.flush()
        upc_session.close()

Пример #2

0

Показать файл

def main(user_args):
    date = user_args.date
    instrument = user_args.instrument
    spacecraft = user_args.spacecraft
    database = user_args.database
    target = user_args.target
    loglevel = user_args.loglevel
    statistics = user_args.statistics

    logging.basicConfig(level=loglevel)
    source_session_maker, _ = db_connect(upc_db)
    source_session = source_session_maker()

    upc_ids = get_upc_ids(source_session, date, instrument,
                          spacecraft, target)

    if statistics:
        get_stats(source_session, upc_ids)
        return

    targets = ['public_mars', 'public_moon', 'public_other']
    #targets = ['upcdev_mars', 'upcdev_moon', 'upcdev_other']

    target_sessions = {}

    for target in targets:
        Session, _ = db_connect(target)
        session = Session()
        target_sessions[target] = session

    instrument = get_instrument(source_session, instrument,
                                spacecraft)

    for target_session in target_sessions.values():
        sync_instrument(target_session, instrument)

    instrument_keywords = get_keywords(source_session, instrument,
                                       spacecraft)
    common_keywords = get_keywords(source_session, 'COMMON')

    # Sync keywords, instruments with all databases
    for target_session in target_sessions.values():
        #sync_instrument(target_session, instrument)
        sync_keywords(target_session, instrument_keywords)
        sync_keywords(target_session, common_keywords)
        target_session.commit()

    meta_types = [MetaTime, MetaString, MetaGeometry, MetaBoolean, MetaBands,
                  MetaPrecision, MetaInteger]

    for upc_id in upc_ids:
        sync_upc_id(source_session, target_sessions, upc_id, meta_types)

Пример #3

0

Показать файл

    def __init__(self,
                 process_name,
                 archive,
                 volume=None,
                 search=None,
                 log_level='INFO',
                 namespace=None):
        self.process_name = process_name
        self.archive = archive
        self.logger = self.get_logger(log_level)
        self.archive_info = json.load(open(pds_info, 'r'))
        try:
            self.archive_id = self.get_archive_att('archiveid')
        except KeyError:
            self.logger.error("Archive %s not found in %s", archive, pds_info)
            raise
        self.volume = volume
        self.search = search
        self.namespace = namespace
        self.ready_queue = RedisQueue(f"{process_name}_ReadyQueue", namespace)
        self.error_queue = RedisQueue(f"{process_name}_ErrorQueue", namespace)
        self.logger.info("%s queue: %s", process_name,
                         self.ready_queue.id_name)

        try:
            pds_session_maker, _ = db_connect(pds_db)
            self.logger.info('Database Connection Success')
        except Exception as e:
            self.logger.error('Database Connection Error\n\n%s', e)
            raise

        self.session_maker = pds_session_maker

Пример #4

0

Показать файл

Файл: test_upc.py Проект: AustinSanders/PDS-Pipelines

def session(tables, request):
    Session, _ = db_connect('upc_test')
    session = Session()

    def cleanup():
        session.rollback()  # Necessary because some tests intentionally fail
        for t in reversed(tables):
            # Skip the srid table
            if t != 'spatial_ref_sys':
                session.execute(f'TRUNCATE TABLE {t} CASCADE')
            # Reset the autoincrementing
            if t in ['datafiles', 'instruments', 'targets']:
                if t == 'datafiles':
                    column = f'{t}_upcid_seq'
                if t == 'instruments':
                    column = f'{t}_instrumentid_seq'
                if t == 'targets':
                    column = f'{t}_targetid_seq'

                session.execute(f'ALTER SEQUENCE {column} RESTART WITH 1')
        session.commit()

    request.addfinalizer(cleanup)

    return session

Пример #5

0

Показать файл

Файл: BatchDI.py Проект: nhanderson/PDS-Pipelines

def main():
    PDS_info = json.load(open(pds_info, 'r'))
    reddis_queue = RedisQueue('DI_ReadyQueue')

    try:
        session, _ = db_connect(pds_db)
    except Exception as e:
        print(e)
        return 1

    for target in PDS_info:
        archive_id = PDS_info[target]['archiveid']
        td = (datetime.datetime.now(pytz.utc) -
              datetime.timedelta(days=30)).strftime("%Y-%m-%d %H:%M:%S")
        testing_date = datetime.datetime.strptime(str(td), "%Y-%m-%d %H:%M:%S")
        expired = archive_expired(session, archive_id, testing_date)
        # If any files within the archive are expired, send them to the queue
        if expired.count():
            # @TODO get rid of print statements or enable with --verbose?
            for f in expired:
                reddis_queue.QueueAdd((f.filename, target))
            print('Archive {} DI Ready: {} Files'.format(
                target, str(expired.count())))
        else:
            print('Archive {} DI Current'.format(target))
    return 0

Пример #6

0

Показать файл

Файл: ChecksumUpdateQueueing.py Проект: acollins19/PDS-Pipelines

def main():

    #    pdb.set_trace()

    args = Args()
    args.parse_args()

    RQ = RedisQueue('ChecksumUpdate_Queue')

    # @TODO Remove/replace "archiveID"
    archiveID = {
        'cassiniISS': 'cassini_iss_edr',
        'mroCTX': 16,
        'mroHIRISE_EDR': '124',
        'LROLRC_EDR': 74
    }

    # ********* Set up logging *************
    logger = logging.getLogger('ChecksumUpdate_Queueing.' + args.archive)
    level = logging.getLevelName(args.log_level)
    logger.setLevel(level)
    logFileHandle = logging.FileHandler(pds_log + 'DI.log')
    formatter = logging.Formatter(
        '%(asctime)s - %(name)s - %(levelname)s, %(message)s')
    logFileHandle.setFormatter(formatter)
    logger.addHandler(logFileHandle)

    logger.info('Starting %s Checksum update Queueing', args.archive)
    if args.volume:
        logger.info('Queueing %s Volume', args.volume)

    try:
        # Throws away engine information
        session, _ = db_connect(pds_db)
        logger.info('DataBase Connecton: Success')
    except:
        logger.error('DataBase Connection: Error')
        return 1

    if args.volume:
        volstr = '%' + args.volume + '%'
        QueryOBJ = session.query(Files).filter(
            Files.archiveid == archiveID[args.archive],
            Files.filename.like(volstr))
    else:
        QueryOBJ = session.query(Files).filter(
            Files.archiveid == archiveID[args.archive])
    addcount = 0
    for element in QueryOBJ:
        try:
            RQ.QueueAdd(element.filename)
            addcount = addcount + 1
        except:
            logger.error('File %s Not Added to DI_ReadyQueue',
                         element.filename)

    logger.info('Files Added to Queue %s', addcount)

    logger.info('DI Queueing Complete')

Пример #7

0

Показать файл

 def __init__(self, database):
     """
     Parameters
     -----------
     database : str
     """
     if database == "JOBS":
         self.session_maker, self.engine = db_connect(cluster_db)
         Base = automap_base()
         Base.prepare(self.engine, reflect=True)
         self.processingTAB = Base.classes.processing
     elif database == "DI":
         base = automap_base()
         self.session_maker, _ = db_connect(pds_db)
         self.files = Files
         self.archives = Archives
         self.DB_files = self.files

Пример #8

0

Показать файл

def create_pds_database():
    try:
        Session, engine = db_connect(pds_db)
    except:
        Session = None
        engine = None

    if isinstance(Session, sqlalchemy.orm.sessionmaker):

        # Create the database
        if not database_exists(engine.url):
            create_database(engine.url, template='template_postgis'
                            )  # This is a hardcode to the local template

        Base.metadata.bind = engine
        # If the table does not exist, this will create it. This is used in case a
        # user has manually dropped a table so that the project is not wrecked.
        Base.metadata.create_all(tables=[
            ProcessRuns.__table__, Files.__table__, Archives.__table__
        ])
        with open(pds_info, 'r') as fp:
            data = json.load(fp)

        i = 0
        archive_list = []
        archive_ids = []
        unsupported_args = [
            'bandbinQuery', 'FilterName', 'upc_reqs', 'mission', 'bandorder'
        ]
        for key, value in data.items():
            value['archive_name'] = key
            value['missionid'] = i
            value['pds_archive'] = True
            value['primary_node'] = 'USGS'
            value.pop('path')

            for key in unsupported_args:
                try:
                    value.pop(key)
                except:
                    pass

            if value['archiveid'] not in archive_ids:
                archive_ids.append(value['archiveid'])
                archive_list.append(Archives(**value))
            i += 1

        session = Session()
        try:
            session.add_all(archive_list)
            session.commit()
        except sqlalchemy.exc.IntegrityError as e:
            print(
                "Not adding new archives to Archives table, as the table is likely "
                + "already populated. This a result of the following: \n\n{}".
                format(e))
        session.close()

Пример #9

0

Показать файл

Файл: UPCqueueing.py Проект: nhanderson/PDS-Pipelines

def main():

    #    pdb.set_trace()
    args = Args()
    args.parse_args()

    logger = logging.getLogger('UPC_Queueing.' + args.archive)
    logger.setLevel(logging.INFO)
    # logFileHandle = logging.FileHandler('/usgs/cdev/PDS/logs/Process.log')
    logFileHandle = logging.FileHandler(pds_log + 'Process.log')
    formatter = logging.Formatter(
        '%(asctime)s - %(name)s - %(levelname)s, %(message)s')
    logFileHandle.setFormatter(formatter)
    logger.addHandler(logFileHandle)

    logger.info('Starting Process')

    PDSinfoDICT = json.load(open(pds_info, 'r'))
    try:
        archiveID = PDSinfoDICT[args.archive]['archiveid']
    except KeyError:
        print("\nArchive '{}' not found in {}\n".format(
            args.archive, pds_info))
        print("The following archives are available:")
        for k in PDSinfoDICT.keys():
            print("\t{}".format(k))
        exit()

    RQ = RedisQueue('UPC_ReadyQueue')

    try:
        session, _ = db_connect(pds_db)
        print('Database Connection Success')
    except Exception as e:
        print(e)
        print('Database Connection Error')

    if args.volume:
        volstr = '%' + args.volume + '%'
        qOBJ = session.query(Files).filter(Files.archiveid == archiveID,
                                           Files.filename.like(volstr),
                                           Files.upc_required == 't')
    else:
        qOBJ = session.query(Files).filter(Files.archiveid == archiveID,
                                           Files.upc_required == 't')
    if qOBJ:
        addcount = 0
        for element in qOBJ:
            fname = PDSinfoDICT[args.archive]['path'] + element.filename
            fid = element.fileid
            RQ.QueueAdd((fname, fid, args.archive))
            addcount = addcount + 1

        logger.info('Files Added to UPC Queue: %s', addcount)

    print("Done")

Пример #10

0

Показать файл

Файл: batch_upc.py Проект: kaitlyndlee/PDS-Pipelines

def main(user_args):
    log_level = user_args.log_level

    PDS_info = json.load(open(pds_info, 'r'))
    reddis_queue = RedisQueue('UPC_ReadyQueue')
    logger = logging.getLogger('UPC_Queueing')
    level = logging.getLevelName(log_level)
    logger.setLevel(level)
    logFileHandle = logging.FileHandler(pds_log + 'Process.log')
    formatter = logging.Formatter(
        '%(asctime)s - %(name)s - %(levelname)s, %(message)s')
    logFileHandle.setFormatter(formatter)
    logger.addHandler(logFileHandle)

    logger.info("UPC Queue: %s", reddis_queue.id_name)

    try:
        Session, _ = db_connect(pds_db)
        session = Session()
    except Exception as e:
        logger.error("%s", e)
        return 1

    # For each archive in the db, test if there are files that are ready to
    #  process
    for archive_id in session.query(Files.archiveid).distinct():
        result = session.query(Files).filter(Files.archiveid == archive_id,
                                             Files.upc_required == 't')

        # Get filepath from archive id
        archive_name = session.query(Archives.archive_name).filter(
            Archives.archiveid == archive_id).first()

        # No archive name = no path.  Skip these values.
        if (archive_name is None):
            logger.warn("No archive name found for archive id: %s", archive_id)
            continue
        try:
            # Since results are returned as lists, we have to access the 0th
            #  element to pull out the string archive name.
            fpath = PDS_info[archive_name[0]]['path']
        except KeyError:
            logger.warn("Unable to locate file path for archive id %s",
                        archive_id)
            continue

        # Add each file in the archive to the redis queue.
        for element in result:
            fname = fpath + element.filename
            fid = element.fileid
            reddis_queue.QueueAdd((fname, fid, archive_name[0]))

        logger.info("Added %s files from %s", result.count(), archive_name)
    return 0

Пример #11

0

Показать файл

Файл: rm_expired_products.py Проект: nhanderson/PDS-Pipelines

def main():
    path = cfg.pow_map2_base
    session, _ = db_connect('clusterjob_prd')
    id2task = map_type_ids(session)
    try:
        n_days = sys.argv[1]
    except IndexError:
        n_days = 14
    old_files = get_old_keys(session)
    remove(path, old_files, id2task, session)
    return 0

Пример #12

0

Показать файл

def main():

    args = Args()
    args.parse_args()

    PDSinfoDICT = json.load(open(pds_info, 'r'))
    try:
        archiveID = PDSinfoDICT[args.archive]['archiveid']
    except KeyError:
        print("\nArchive '{}' not found in {}\n".format(args.archive, pds_info))
        print("The following archives are available:")
        for k in PDSinfoDICT.keys():
            print("\t{}".format(k))
        exit()

    logger = logging.getLogger('DI_Ready.' + args.archive)
    level = logging.getLevelName(args.log_level)
    logger.setLevel(level)
    logFileHandle = logging.FileHandler(pds_log + 'DI.log')
    formatter = logging.Formatter(
        '%(asctime)s - %(name)s - %(levelname)s, %(message)s')
    logFileHandle.setFormatter(formatter)
    logger.addHandler(logFileHandle)

    try:
        # Throws away 'engine' information
        session, _ = db_connect(pds_db)
        logger.info("%s", args.archive)
        logger.info("Database Connection Success")
    except Exception as e:
        logger.error("%s", e)
    else:
        # if db connection fails, there's no sense in doing this part
        td = (datetime.datetime.now(pytz.utc)
              - datetime.timedelta(days=30)).strftime("%Y-%m-%d %H:%M:%S")
        testing_date = datetime.datetime.strptime(str(td), "%Y-%m-%d %H:%M:%S")

        if args.volume:
            expired = volume_expired(session, archiveID, args.volume, testing_date)
            if expired.count():
                logger.info("Volume %s DI Ready: %s Files", args.volume,
                            str(expired.count()))
            else:
                logger.info('Volume %s DI Current', args.volume)
        else:
            expired = archive_expired(session, archiveID, testing_date)
            if expired.count():
                logger.info('Archive %s DI Ready: %s Files', args.archive,
                            str(expired.count()))
            else:
                logger.info('Archive %s DI Current', args.archive)

Пример #13

0

Показать файл

Файл: browse_queueing.py Проект: kaitlyndlee/PDS-Pipelines

def main(user_args):
    archive = user_args.archive
    volume = user_args.volume
    search = user_args.search
    log_level = user_args.log_level

    logger = logging.getLogger('Browse_Queueing.' + archive)
    level = logging.getLevelName(log_level)
    logger.setLevel(level)
    logFileHandle = logging.FileHandler(pds_log + 'Process.log')
    formatter = logging.Formatter(
        '%(asctime)s - %(name)s - %(levelname)s, %(message)s')
    logFileHandle.setFormatter(formatter)
    logger.addHandler(logFileHandle)

    logger.info('Starting Process')

    PDSinfoDICT = json.load(open(pds_info, 'r'))
    archiveID = PDSinfoDICT[archive]['archiveid']

    RQ = RedisQueue('Browse_ReadyQueue')
    logger.info("Browse Queue: %s", RQ.id_name)

    try:
        Session, _ = db_connect(pds_db)
        session = Session()
        logger.info('Database Connection Success')
    except:
        logger.error('Database Connection Error')
        return 1

    if volume:
        volstr = '%' + volume + '%'
        qOBJ = session.query(Files).filter(Files.archiveid == archiveID,
                                           Files.filename.like(volstr),
                                           Files.upc_required == 't')
    else:
        qOBJ = session.query(Files).filter(Files.archiveid == archiveID,
                                           Files.upc_required == 't')
    if qOBJ:
        addcount = 0
        for element in qOBJ:
            fname = PDSinfoDICT[archive]['path'] + element.filename
            fid = element.fileid
            RQ.QueueAdd((fname, fid, archive))
            addcount = addcount + 1

        logger.info('Files Added to UPC Queue: %s', addcount)

Пример #14

0

Показать файл

Файл: write_view_jsons.py Проект: AustinSanders/PDS-Pipelines

def main(user_args):
    if user_args.path:
        path = user_args.path
    else:
        path = summaries_path

    Session, _ = db_connect(upc_db)
    session = Session()

    for key in queries:
        json_query = "with t AS ({}) SELECT json_agg(t) FROM t;".format(queries[key])
        output = session.execute(json_query)
        json_output = json.dumps([dict(line) for line in output])

        with open(path + key + ".json", "w") as json_file:
            json_file.write(json_output)

Пример #15

0

Показать файл

def main():

    #    pdb.set_trace()
    args = Args()
    args.parse_args()

    logger = logging.getLogger('Thumbnail_Queueing.' + args.archive)
    logger.setLevel(logging.INFO)
    logFileHandle = logging.FileHandler(pds_log + 'Process.log')
    formatter = logging.Formatter(
        '%(asctime)s - %(name)s - %(levelname)s, %(message)s')
    logFileHandle.setFormatter(formatter)
    logger.addHandler(logFileHandle)

    logger.info('Starting Process')

    PDSinfoDICT = json.load(open(pds_info, 'r'))
    archiveID = PDSinfoDICT[args.archive]['archiveid']

    RQ = RedisQueue('Thumbnail_ReadyQueue')

    try:
        Session, _ = db_connect(pds_db)
        session = Session()
        logger.info('Database Connection Success')
    except:
        logger.error('Database Connection Error')

    if args.volume:
        volstr = '%' + args.volume + '%'
        qOBJ = session.query(Files).filter(Files.archiveid == archiveID,
                                           Files.filename.like(volstr),
                                           Files.upc_required == 't')
    else:
        qOBJ = session.query(Files).filter(Files.archiveid == archiveID,
                                           Files.upc_required == 't')
    if qOBJ:
        addcount = 0
        for element in qOBJ:
            fname = PDSinfoDICT[args.archive]['path'] + element.filename
            fid = element.fileid
            RQ.QueueAdd((fname, fid, args.archive))
            addcount = addcount + 1

        logger.info('Files Added to Thumbnail Queue: %s', addcount)

Пример #16

0

Показать файл

Файл: FindDI_Ready.py Проект: nhanderson/PDS-Pipelines

def main():

    args = Args()
    args.parse_args()

    PDSinfoDICT = json.load(open(pds_info, 'r'))
    try:
        archiveID = PDSinfoDICT[args.archive]['archiveid']
    except KeyError:
        print("\nArchive '{}' not found in {}\n".format(
            args.archive, pds_info))
        print("The following archives are available:")
        for k in PDSinfoDICT.keys():
            print("\t{}".format(k))
        exit()

    try:
        # Throws away 'engine' information
        session, _ = db_connect(pds_db)
        print(args.archive)
        print('Database Connection Success')
    except Exception as e:
        print(e)
    else:
        # if db connection fails, there's no sense in doing this part
        td = (datetime.datetime.now(pytz.utc) -
              datetime.timedelta(days=30)).strftime("%Y-%m-%d %H:%M:%S")
        testing_date = datetime.datetime.strptime(str(td), "%Y-%m-%d %H:%M:%S")

        if args.volume:
            expired = volume_expired(session, archiveID, args.volume,
                                     testing_date)
            if expired.count():
                print('Volume {} DI Ready: {} Files'.format(
                    args.volume, str(expired.count())))
            else:
                print('Volume {} DI Current'.format(args.volume))
        else:
            expired = archive_expired(session, archiveID, testing_date)
            if expired.count():
                print('Archive {} DI Ready: {} Files'.format(
                    args.archive, str(expired.count())))
            else:
                print('Archive {} DI Current'.format(args.archive))

Пример #17

0

Показать файл

Файл: batchUPC.py Проект: nhanderson/PDS-Pipelines

def main():
    PDS_info = json.load(open(pds_info, 'r'))
    reddis_queue = RedisQueue('UPC_ReadyQueue')

    try:
        # Safe to use prd database here because there are no writes/edits.
        session, _ = db_connect(pds_db)

    # @TODO Catch exceptions by type.  Bad practice to 'except Exception,' but
    #   I don't know what exception could happen here.
    except Exception as e:
        print(e)
        return 1

    # For each archive in the db, test if there are files that are ready to
    #  process
    for archive_id in session.query(Files.archiveid).distinct():
        result = session.query(Files).filter(Files.archiveid == archive_id,
                                             Files.upc_required == 't')

        # Get filepath from archive id
        archive_name = session.query(Archives.archive_name).filter(
            Archives.archiveid == archive_id).first()

        # No archive name = no path.  Skip these values.
        if (archive_name is None):
            # @TODO log an error
            continue

        try:
            # Since results are returned as lists, we have to access the 0th
            #  element to pull out the string archive name.
            fpath = PDS_info[archive_name[0]]['path']
        except KeyError:
            continue

        # Add each file in the archive to the redis queue.
        for element in result:
            fname = fpath + element.filename
            fid = element.fileid
            reddis_queue.QueueAdd((fname, fid, archive_name[0]))
    return 0

Пример #18

0

Показать файл

Файл: test_upc_db.py Проект: dpmayerUSGS/PDS-Pipelines

def session(tables, request):
    Session, _ = db_connect('upc_test')
    session = Session()

    models.Targets.create(session,
                          targetname='FAKE_TARGET',
                          system='FAKE_SYSTEM')
    models.Instruments.create(session,
                              instrument='FAKE_CAMERA',
                              spacecraft='FAKE_CRAFT')
    models.DataFiles.create(session,
                            instrumentid=1,
                            targetid=1,
                            source='/Path/to/pds/file.img')
    models.SearchTerms.create(session, upcid=1, instrumentid=1, targetid=1)
    models.JsonKeywords.create(session, upcid=1)

    session.commit()
    session.flush()

    def cleanup():
        session.rollback()  # Necessary because some tests intentionally fail
        for t in reversed(tables):
            # Skip the srid table
            if t != 'spatial_ref_sys':
                session.execute(f'TRUNCATE TABLE {t} CASCADE')
            # Reset the autoincrementing
            if t in ['datafiles', 'instruments', 'targets']:
                if t == 'datafiles':
                    column = f'{t}_upcid_seq'
                if t == 'instruments':
                    column = f'{t}_instrumentid_seq'
                if t == 'targets':
                    column = f'{t}_targetid_seq'

                session.execute(f'ALTER SEQUENCE {column} RESTART WITH 1')
        session.commit()

    request.addfinalizer(cleanup)

    return session

Пример #19

0

Показать файл

def main():
    Session, _ = db_connect(upc_db)

    path = summaries_path

    with session_scope(Session) as session:
        print("Creating Hist Table")
        session.execute(query)
        histogram_qobj = session.query("histogram_summary")
        total_rows = session.execute(
            "SELECT count(*) FROM histogram_summary;").first()[0]
        page_number = 0
        number_of_rows_per_page = 200000
        complete_json_output = []
        print("Paging hist results")
        while True:
            lower_bound = page_number * number_of_rows_per_page
            upper_bound = (page_number *
                           number_of_rows_per_page) + number_of_rows_per_page

            if upper_bound > total_rows:
                number_of_rows_per_page = total_rows - lower_bound

            json_query = "with t AS (SELECT * FROM histogram_summary LIMIT {} OFFSET {}) SELECT json_agg(t) FROM t;".format(
                number_of_rows_per_page, lower_bound)
            output = session.execute(json_query).fetchall()
            complete_json_output.extend([dict(line) for line in output])

            page_number += 1

            if upper_bound > total_rows:
                break

        print("Finished view generation")

    print("Writing Json")
    json_output = json.dumps(complete_json_output)
    with open(path + "histogram_summary.json", "a") as json_file:
        json_file.write(json_output)

Пример #20

0

Показать файл

Файл: BatchDI.py Проект: acollins19/PDS-Pipelines

def main():
    args = Args()
    args.parse_args()

    logger = logging.getLogger('DI_Queueing')
    level = logging.getLevelName(args.log_level)
    logger.setLevel(level)
    logFileHandle = logging.FileHandler(pds_log + 'DI.log')
    formatter = logging.Formatter(
        '%(asctime)s - %(name)s - %(levelname)s, %(message)s')
    logFileHandle.setFormatter(formatter)
    logger.addHandler(logFileHandle)

    PDS_info = json.load(open(pds_info, 'r'))
    reddis_queue = RedisQueue('DI_ReadyQueue')

    logger.info("DI Queue: %s", reddis_queue.id_name)

    try:
        session, _ = db_connect(pds_db)
    except Exception as e:
        logger.error("%s", e)
        return 1

    for target in PDS_info:
        archive_id = PDS_info[target]['archiveid']
        td = (datetime.datetime.now(pytz.utc) -
              datetime.timedelta(days=30)).strftime("%Y-%m-%d %H:%M:%S")
        testing_date = datetime.datetime.strptime(str(td), "%Y-%m-%d %H:%M:%S")
        expired = archive_expired(session, archive_id, testing_date)
        # If any files within the archive are expired, send them to the queue
        if expired.count():
            for f in expired:
                reddis_queue.QueueAdd((f.filename, target))
            logger.info('Archive %s DI Ready: %s Files', target,
                        str(expired.count()))
        else:
            logger.info('Archive %s DI Current', target)
    return 0

Пример #21

0

Показать файл

def main():
    args = Args()
    args.parse_args()
    key = args.key

    if key is None:
        print("No key specified.\nUsage:\t python clear_timestamps.py -k <job_key>")
        exit(1)

    Session, engine = db_connect('clusterjob_prd')
    session = Session()
    record = session.query(Processing).filter(Processing.key == key).first()

    record.queued = None
    record.started = None
    record.finished = None
    record.accessed = None
    record.notified = None
    record.purged = None

    session.merge(record)
    session.flush()
    session.commit()

Пример #22

0

Показать файл

class JsonKeywords(BaseMixin, Base):
    __tablename__ = "json_keywords"
    upcid = Column(Integer, ForeignKey('datafiles.upcid'), primary_key=True)
    jsonkeywords = Column(MutableDict.as_mutable(JSONB))


class_map = {
    'datafiles': DataFiles,
    'instruments': Instruments,
    'targets': Targets,
    'search_terms': SearchTerms
}

try:
    Session, engine = db_connect(upc_db)
except:
    Session = None
    engine = None

if isinstance(Session, sqlalchemy.orm.sessionmaker):

    # Create the database
    if not database_exists(engine.url):
        create_database(engine.url, template='template_postgis'
                        )  # This is a hardcode to the local template

    Base.metadata.bind = engine
    # If the table does not exist, this will create it. This is used in case a
    # user has manually dropped a table so that the project is not wrecked.
    Base.metadata.create_all(tables=[

Пример #23

0

Показать файл

Файл: di_process.py Проект: kaitlyndlee/PDS-Pipelines

def main(user_args):
    log_level = user_args.log_level

    PDSinfoDICT = json.load(open(pds_info, 'r'))

    # Set up logging
    logger = logging.getLogger('DI_Process')
    level = logging.getLevelName(log_level)
    logger.setLevel(level)
    logFileHandle = logging.FileHandler(pds_log + 'DI.log')
    formatter = logging.Formatter(
        '%(asctime)s - %(name)s - %(levelname)s, %(message)s')
    logFileHandle.setFormatter(formatter)
    logger.addHandler(logFileHandle)

    logger.info('Starting DI Process')

    try:
        Session, engine = db_connect(pds_db)
        session = Session()
        logger.info('DataBase Connecton: Success')
    except Exception as e:
        logger.error('DataBase Connection Error: %s', str(e))
        return 1

    RQ = RedisQueue('DI_ReadyQueue')
    RQ_error = RedisQueue(upc_error_queue)
    RQ_lock = RedisLock(lock_obj)
    RQ_lock.add({RQ.id_name: '1'})
    index = 0

    logger.info("DI Queue: %s", RQ.id_name)

    while int(RQ.QueueSize()) > 0 and RQ_lock.available(RQ.id_name):
        item = literal_eval(RQ.QueueGet())
        inputfile = item[0]
        archive = item[1]
        logger.debug("%s - %s", inputfile, archive)
        try:
            Qelement = session.query(Files).filter(
                Files.filename == inputfile).one()
        except Exception as e:
            logger.warn('Filename query failed for inputfile %s: %s', inputfile, str(e))
            continue

        archive_path = PDSinfoDICT[archive]['path']

        cpfile = archive_path + Qelement.filename
        if os.path.isfile(cpfile):
            f_hash = hashlib.md5()
            with open(cpfile, "rb") as f:
                for chunk in iter(lambda: f.read(4096), b""):
                    f_hash.update(chunk)
            checksum = f_hash.hexdigest()

            Qelement.di_pass = checksum == Qelement.checksum
            if !Qelement.di_pass:
                logger.warn('File %s checksum %s does not match the database entry checksum %s', 
                            cpfile, checksum, Qelement.checksum)

            Qelement.di_date = datetime.datetime.now(
                pytz.utc).strftime("%Y-%m-%d %H:%M:%S")
            session.flush()
            index = index + 1
            if index > 50:
                session.commit()
                logger.info('Session Commit for 50 Records: Success')
                index = 0
        else:
            RQ_error.QueueAdd(f'Unable to locate or access {inputfile} during DI processing')
            logger.warn('File %s Not Found', cpfile)
    try:
        session.commit()
        logger.info("End Commit DI process to Database: Success")
        index = 1
    except Exception as e:
        logger.warn("Unable to commit changes to database\n\n%s", e)
        session.rollback()

    # Close connection to database
    session.close()
    engine.dispose()

Пример #24

0

Показать файл

def main(user_args):
    archive = user_args.archive
    volume = user_args.volume
    jobarray = user_args.jobarray
    log_level = user_args.log_level

    RQ = RedisQueue('DI_ReadyQueue')

    PDSinfoDICT = json.load(open(pds_info, 'r'))
    try:
        archiveID = PDSinfoDICT[archive]['archiveid']
    except KeyError:
        print("\nArchive '{}' not found in {}\n".format(archive, pds_info))
        print("The following archives are available:")
        for k in PDSinfoDICT.keys():
            print("\t{}".format(k))
        exit()

    logger = logging.getLogger('DI_Queueing.' + archive)
    level = logging.getLevelName(log_level)
    logger.setLevel(level)
    logFileHandle = logging.FileHandler(pds_log + 'DI.log')
    formatter = logging.Formatter(
        '%(asctime)s - %(name)s - %(levelname)s, %(message)s')
    logFileHandle.setFormatter(formatter)
    logger.addHandler(logFileHandle)

    logger.info("DI Queue: %s", RQ.id_name)

    logger.info('Starting %s DI Queueing', archive)
    if volume:
        logger.info('Queueing %s Volume', volume)

    try:
        Session, _ = db_connect(pds_db)
        session = Session()
        logger.info('DataBase Connecton: Success')
    except:
        logger.error('DataBase Connection: Error')
        return 1

    if volume:
        volstr = '%' + volume + '%'
        vol_exists = session.query(Files).filter(
            Files.archiveid == archiveID, Files.filename.like(volstr)).first()
        if not vol_exists:
            print(
                f"No files exist in the database for volume \"{volume}\"."
                "  Either the volume does not exist or it has not been properly ingested.\n"
            )
            exit()

    td = (datetime.datetime.now(pytz.utc) -
          datetime.timedelta(days=30)).strftime("%Y-%m-%d %H:%M:%S")
    testing_date = datetime.datetime.strptime(str(td), "%Y-%m-%d %H:%M:%S")

    if volume:
        volstr = '%' + volume + '%'
        testQ = session.query(Files).filter(
            Files.archiveid == archiveID, Files.filename.like(volstr)).filter(
                or_(
                    cast(Files.di_date, Date) < testing_date,
                    cast(Files.di_date, Date) is None))
    else:
        testQ = session.query(Files).filter(
            Files.archiveid == archiveID).filter(
                or_(
                    cast(Files.di_date, Date) < testing_date,
                    cast(Files.di_date, Date) is None))

    addcount = 0
    for element in testQ:
        try:
            RQ.QueueAdd((element.filename, archive))
            addcount = addcount + 1
        except:
            logger.warning('File %s Not Added to DI_ReadyQueue',
                           element.filename)

    logger.info('Files Added to Queue %s', addcount)
    logger.info('DI Queueing Complete')

Пример #25

0

Показать файл

        uselist=True,
        cascade="save-update, merge, delete, delete-orphan")


class Archives(BaseMixin, Base):
    __tablename__ = 'archives'
    archiveid = Column(Integer, primary_key=True, autoincrement=True)
    archive_name = Column(String(1024))
    missionid = Column(Integer)
    pds_archive = Column(Boolean)
    primary_node = Column(String(64))
    file = relationship('Files', backref='archives', uselist=False)


try:
    Session, engine = db_connect(pds_db)
except:
    Session = None
    engine = None

if isinstance(Session, sqlalchemy.orm.sessionmaker):

    # Create the database
    if not database_exists(engine.url):
        create_database(engine.url, template='template_postgis'
                        )  # This is a hardcode to the local template

    Base.metadata.bind = engine
    # If the table does not exist, this will create it. This is used in case a
    # user has manually dropped a table so that the project is not wrecked.
    Base.metadata.create_all(

Пример #26

0

Показать файл

Файл: upc_update.py Проект: dpmayerUSGS/PDS-Pipelines

def main(user_args):
    upc_session_maker, upc_engine = db_connect(upc_db)

    persist = user_args.persist
    log_level = user_args.log_level
    namespace = user_args.namespace

    try:
        slurm_job_id = os.environ['SLURM_ARRAY_JOB_ID']
        slurm_array_id = os.environ['SLURM_ARRAY_TASK_ID']
    except:
        slurm_job_id = ''
        slurm_array_id = ''

    inputfile = ''
    context = {'job_id': slurm_job_id, 'array_id':slurm_array_id, 'inputfile': inputfile}
    logger = logging.getLogger('UPC_Process')
    level = logging.getLevelName(log_level)
    logger.setLevel(level)
    log_file_handle = logging.FileHandler(pds_log + 'Process.log')
    formatter = logging.Formatter(
        '%(asctime)s - %(job_id)s - %(array_id)s - %(inputfile)s - %(name)s - %(levelname)s, %(message)s')
    log_file_handle.setFormatter(formatter)
    logger.addHandler(log_file_handle)
    logger = logging.LoggerAdapter(logger, context)

    # Redis Queue Objects
    RQ_main = RedisQueue('UPC_UpdateQueue', namespace)
    RQ_work = RedisQueue('UPC_UpdateWorkQueue', namespace)
    logger.info("UPC Update Queue: %s", RQ_main.id_name)

    RQ_error = RedisQueue(upc_error_queue)
    RQ_lock = RedisLock(lock_obj)
    # If the queue isn't registered, add it and set it to "running"
    RQ_lock.add({RQ_main.id_name: '1'})
    # while there are items in the redis queue
    while int(RQ_main.QueueSize()) > 0 and RQ_lock.available(RQ_main.id_name):
        # get a file from the queue
        item = RQ_main.Qfile2Qwork(RQ_main.getQueueName(), RQ_work.getQueueName())
        item_list = literal_eval(item)
        inputfile = item_list[0]
        archive = item_list[1]
        failing_command = item_list[2]
        update_type = item_list[3]
        upc_id = None

        if not os.path.isfile(inputfile):
            RQ_error.QueueAdd(f'Unable to locate or access {inputfile} during UPC update')
            logger.debug("%s is not a file\n", inputfile)
            exit()

        # Build URL for edr_source
        edr_source = inputfile.replace(workarea, web_base)

        # Update the logger context to include inputfile
        context['inputfile'] = inputfile

        try:
            session = upc_session_maker()
            session.close()
        except TypeError as e:
            logger.error("Unable to create a database session/connection to the upc database: %s", e)
            raise e

        try:
            if update_type.lower() == 'upc':

                recipe_file = recipe_base + "/" + archive + '.json'
                no_extension_inputfile = os.path.splitext(inputfile)[0]
                cam_info_file = no_extension_inputfile + '_caminfo.pvl'
                footprint_file = no_extension_inputfile + '_footprint.json'
                catlab_output = no_extension_inputfile + '_catlab.pvl'

                with open(recipe_file) as fp:
                    upc_json = json.load(fp)['upc']
                    # Attempt to get the optional search_term_mapping for the upc
                    # process
                    try:
                        search_term_mapping = upc_json['search_term_mapping']
                    except KeyError:
                        search_term_mapping = {}

                # Some datasets with attached PDS labels cause PVL to hang,
                #  so recipe includes call to dump label using `catlab`
                # If present, use the catlab output as pds_label instead of inputfile
                if os.path.exists(catlab_output):
                    pds_label = pvl.load(catlab_output)
                else:
                    pds_label = pvl.load(inputfile)

                instrument_name = get_instrument_name(pds_label)
                spacecraft_name = get_spacecraft_name(pds_label)
                target_name = get_target_name(pds_label)
                with session_scope(upc_session_maker) as session:
                    target_qobj = Targets.create(session, targetname=target_name,
                                                 displayname=target_name.title(),
                                                 system=target_name)
                    target_id = target_qobj.targetid



                with session_scope(upc_session_maker) as session:
                    instrument_qobj = Instruments.create(session, instrument=instrument_name,
                                                         spacecraft=spacecraft_name)
                    instrument_id = instrument_qobj.instrumentid

                ######## Generate DataFiles Record ########
                datafile_attributes = create_datafiles_atts(pds_label, edr_source, no_extension_inputfile + '.cub')

                datafile_attributes['instrumentid'] = instrument_id
                datafile_attributes['targetid'] = target_id

                with session_scope(upc_session_maker) as session:
                    datafile_qobj = DataFiles.create(session, **datafile_attributes)
                    upc_id = datafile_qobj.upcid

                ######## Generate SearchTerms Record ########
                search_term_attributes = create_search_terms_atts(cam_info_file, upc_id,
                                                                  no_extension_inputfile + '.cub',
                                                                  footprint_file, search_term_mapping)

                search_term_attributes['targetid'] = target_id
                search_term_attributes['instrumentid'] = instrument_id

                with session_scope(upc_session_maker) as session:
                    SearchTerms.create(session, **search_term_attributes)

                ######## Generate JsonKeywords Record ########
                json_keywords_attributes = create_json_keywords_atts(cam_info_file, upc_id, inputfile,
                                                                     failing_command, logger)

                with session_scope(upc_session_maker) as session:
                    JsonKeywords.create(session, **json_keywords_attributes)

            # Derived Processing:

            # If we don't have a upcid, get the matching ID from the database
            if not upc_id:
                with session_scope(upc_session_maker) as session:
                    src = inputfile.replace(workarea, web_base)
                    datafile = session.query(DataFiles).filter(or_(DataFiles.source == src,
                                                                   DataFiles.detached_label == src)).first()
                    if not datafile:
                        RQ_error.QueueAdd(f'No matching upcid was found for {inputfile}, '
                                          'derived product paths could not be added')
                        logger.warning(f'No matching upcid was found for %s, '
                                       'derived product paths could not be added', inputfile)
                    upc_id = datafile.upcid

            final_path = makedir(inputfile)
            src = os.path.splitext(inputfile)[0]
            derived_product = os.path.join(final_path, os.path.splitext(os.path.basename(inputfile))[0])

            # If derived products exist, copy them to the derived area and add the path to the db
            try:
                shutil.move(src + '.browse.jpg', derived_product + '.browse.jpg')
                shutil.move(src + '.thumbnail.jpg', derived_product + '.thumbnail.jpg')
                add_url(derived_product, upc_id, upc_session_maker)
            except FileNotFoundError:
                RQ_error.QueueAdd(f'Unable to locate or access derived products for {inputfile}')
                logger.warning(f'Unable to locate or access derived products for %s', inputfile)

            if not persist:
                # Remove all files file from the workarea except for the copied
                # source file
                file_prefix = os.path.splitext(inputfile)[0]
                workarea_files = glob(file_prefix + '*')
                # os.remove(os.path.join(workarea, 'print.prt'))
                for file in workarea_files:
                    os.remove(file)

        # Handle SQL specific database errors
        except SQLAlchemyError as e:
            logger.error("Database operation failed: %s \nRequeueing (%s, %s)", e, inputfile, archive)
            RQ_main.QueueAdd((inputfile, archive, failing_command, update_type))
            raise e

        RQ_work.QueueRemove(item)

        # Disconnect from the engines
        upc_engine.dispose()

Пример #27

0

Показать файл

Файл: test_upc.py Проект: AustinSanders/PDS-Pipelines

def session_maker(tables, request):
    Session, _ = db_connect('upc_test')
    return Session

Пример #28

0

Показать файл

Файл: browse_process.py Проект: AustinSanders/PDS-Pipelines

def main(user_args):
    log_level = user_args.log_level

    logger = logging.getLogger('Browse_Process')
    level = logging.getLevelName(log_level)
    logger.setLevel(level)
    logFileHandle = logging.FileHandler(pds_log + 'Process.log')
    formatter = logging.Formatter(
        '%(asctime)s - %(name)s - %(levelname)s, %(message)s')
    logFileHandle.setFormatter(formatter)
    logger.addHandler(logFileHandle)

    RQ_main = RedisQueue('Browse_ReadyQueue')
    RQ_error = RedisQueue(upc_error_queue)
    RQ_lock = RedisLock(lock_obj)
    RQ_lock.add({RQ_main.id_name: '1'})

    PDSinfoDICT = json.load(open(pds_info, 'r'))

    pds_session_maker, pds_engine = db_connect(pds_db)
    pds_session = pds_session_maker()
    upc_session_maker, upc_engine = db_connect(upc_db)
    upc_session = upc_session_maker()

    tid = get_tid('fullimageurl', upc_session)

    while int(RQ_main.QueueSize()) > 0 and RQ_lock.available(RQ_main.id_name):
        item = literal_eval(RQ_main.QueueGet())
        inputfile = item[0]
        fid = item[1]
        archive = item[2]
        if os.path.isfile(inputfile):
            logger.info('Starting Process: %s', inputfile)
            finalpath = makedir(inputfile)

            recipeOBJ = Recipe()
            recipeOBJ.addMissionJson(archive, 'reduced')

            infile = workarea + os.path.splitext(
                os.path.basename(inputfile))[0] + '.Binput.cub'
            outfile = workarea + os.path.splitext(
                os.path.basename(inputfile))[0] + '.Boutput.cub'
            status = 'success'
            for item in recipeOBJ.getProcesses():
                if status == 'error':
                    logger.error("Error processing %s", inputfile)
                    break
                elif status == 'success':
                    processOBJ = Process()
                    processOBJ.ProcessFromRecipe(item, recipeOBJ.getRecipe())

                    if '2isis' in item:
                        processOBJ.updateParameter('from_', inputfile)
                        processOBJ.updateParameter('to', outfile)
                    elif item == 'spiceinit':
                        processOBJ.updateParameter('from_', infile)
                    elif item == 'cubeatt':
                        label = pvl.load(infile)
                        bands = PDSinfoDICT[archive]['bandorder']
                        query_bands = label['IsisCube']['BandBin'][
                            PDSinfoDICT[archive]['bandbinQuery']]
                        # Create a set from the list / single value
                        try:
                            query_band_set = set(query_bands)
                        except:
                            query_band_set = set([query_bands])

                        # Iterate through 'bands' and grab the first value that is present in the
                        #  set defined by 'bandbinquery' -- if not present, default to 1
                        exband = next(
                            (band for band in bands if band in query_band_set),
                            1)

                        band_infile = infile + '+' + str(exband)
                        processOBJ.updateParameter('from_', band_infile)
                        processOBJ.updateParameter('to', outfile)

                    elif item == 'ctxevenodd':
                        label = pvl.load(infile)
                        SS = label['IsisCube']['Instrument']['SpatialSumming']
                        if SS != 1:
                            break
                        else:
                            processOBJ.updateParameter('from_', infile)
                            processOBJ.updateParameter('to', outfile)

                    elif item == 'reduce':
                        label = pvl.load(infile)
                        Nline = label['IsisCube']['Core']['Dimensions'][
                            'Lines']
                        Nsample = label['IsisCube']['Core']['Dimensions'][
                            'Samples']
                        Nline = int(Nline)
                        Nsample = int(Nsample)
                        Sfactor = scaleFactor(Nline, Nsample, recip_json)
                        processOBJ.updateParameter('lscale', Sfactor)
                        processOBJ.updateParameter('sscale', Sfactor)
                        processOBJ.updateParameter('from_', infile)
                        processOBJ.updateParameter('to', outfile)

                    elif item == 'isis2std':
                        final_outfile = finalpath + '/' + os.path.splitext(
                            os.path.basename(inputfile))[0] + '.browse.jpg'
                        processOBJ.updateParameter('from_', infile)
                        processOBJ.updateParameter('to', final_outfile)

                    else:
                        processOBJ.updateParameter('from_', infile)
                        processOBJ.updateParameter('to', outfile)

                    for k, v in processOBJ.getProcess().items():
                        func = getattr(isis, k)
                        try:
                            func(**v)
                            logger.info('Process %s :: Success', k)
                            if os.path.isfile(outfile):
                                if '.cub' in outfile:
                                    os.rename(outfile, infile)
                            status = 'success'
                            if '2isis' in item:
                                isisSerial = getISISid(infile)
                        except ProcessError as e:
                            print(e)
                            logger.error('Process %s :: Error', k)
                            status = 'error'
            if status == 'success':
                DB_addURL(upc_session, isisSerial, final_outfile, tid)
                os.remove(infile)
                logger.info('Browse Process Success: %s', inputfile)
                AddProcessDB(pds_session, fid, 't')
        else:
            RQ_error.QueueAdd(
                f'Unable to locate or access {inputfile} during browse processing'
            )
            logger.error('File %s Not Found', inputfile)

    upc_session.close()
    pds_session.close()
    upc_engine.dispose()
    pds_engine.dispose()

Пример #29

0

Показать файл

Файл: test_upc.py Проект: AustinSanders/PDS-Pipelines

def tables():
    _, engine = db_connect('upc_test')
    return engine.table_names()

Пример #30

0

Показать файл

def main(user_args):
    log_level = user_args.log_level
    override = user_args.override

    logger = logging.getLogger('Ingest_Process')
    level = logging.getLevelName(log_level)
    logger.setLevel(level)
    logFileHandle = logging.FileHandler(pds_log + 'Ingest.log')
    print("Log File: {}Ingest.log".format(pds_log))
    formatter = logging.Formatter(
        '%(asctime)s - %(name)s - %(levelname)s, %(message)s')
    logFileHandle.setFormatter(formatter)
    logger.addHandler(logFileHandle)

    logger.info("Starting Ingest Process")
    PDSinfoDICT = json.load(open(pds_info, 'r'))

    RQ_main = RedisQueue('Ingest_ReadyQueue')
    RQ_error = RedisQueue(upc_error_queue)
    RQ_lock = RedisLock(lock_obj)
    RQ_lock.add({RQ_main.id_name: '1'})
    RQ_work = RedisQueue('Ingest_WorkQueue')

    try:
        Session, engine = db_connect(pds_db)
        session = Session()
        logger.info('DataBase Connecton: Success')
    except:
        logger.error('DataBase Connection: Error')
        return 1

    index = 1

    while int(RQ_main.QueueSize()) > 0 and RQ_lock.available(RQ_main.id_name):

        item = literal_eval(RQ_main.QueueGet())
        inputfile = item[0]
        archive = item[1]

        if not os.path.isfile(inputfile):
            RQ_error.QueueAdd(
                f'Unable to locate or access {inputfile} during ingest processing'
            )
            logger.warn("%s is not a file\n", inputfile)
            continue

        RQ_work.QueueAdd(inputfile)

        subfile = inputfile.replace(PDSinfoDICT[archive]['path'], '')
        # Calculate checksum in chunks of 4096
        f_hash = hashlib.md5()
        with open(inputfile, "rb") as f:
            for chunk in iter(lambda: f.read(4096), b""):
                f_hash.update(chunk)
        filechecksum = f_hash.hexdigest()

        QOBJ = session.query(Files).filter_by(filename=subfile).first()

        runflag = False
        if QOBJ is None or filechecksum != QOBJ.checksum:
            runflag = True

        if runflag or override:
            date = datetime.datetime.now(
                pytz.utc).strftime("%Y-%m-%d %H:%M:%S")
            fileURL = inputfile.replace(archive_base, web_base)

            # If all upc requirements are in 'inputfile,' flag for upc
            try:
                upcflag = all(req in inputfile
                              for req in PDSinfoDICT[archive]['upc_reqs'])
            except KeyError:
                logger.warn(
                    "No upc_reqs found for %s\nSetting upc eligibility False for all related files.",
                    str(archive))
                upcflag = False

            filesize = os.path.getsize(inputfile)

            try:
                ingest_entry = Files()

                if QOBJ is not None and override:
                    ingest_entry.fileid = QOBJ.fileid

                ingest_entry.archiveid = PDSinfoDICT[archive]['archiveid']
                ingest_entry.filename = subfile
                ingest_entry.entry_date = date
                ingest_entry.checksum = filechecksum
                ingest_entry.upc_required = upcflag
                ingest_entry.validation_required = True
                ingest_entry.header_only = False
                ingest_entry.release_date = date
                ingest_entry.file_url = fileURL
                ingest_entry.file_size = filesize
                ingest_entry.di_pass = True
                ingest_entry.di_date = date

                session.merge(ingest_entry)
                session.flush()

                RQ_work.QueueRemove(inputfile)

                index = index + 1

            except Exception as e:
                logger.error("Error During File Insert %s : %s", str(subfile),
                             str(e))

        elif not runflag and not override:
            RQ_work.QueueRemove(inputfile)
            logger.warn(
                "Not running ingest: file %s already present"
                " in database and no override flag supplied", inputfile)

        if index >= 250:
            try:
                session.commit()
                logger.info("Commit 250 files to Database: Success")
                index = 1
            except Exception as e:
                session.rollback()
                logger.warn("Unable to commit to database: %s", str(e))
    else:
        logger.info("No Files Found in Ingest Queue")
        try:
            session.commit()
            logger.info("Commit to Database: Success")
        except Exception as e:
            logger.error("Unable to commit to database: %s", str(e))
            session.rollback()

    # Close connection to database
    session.close()
    engine.dispose()

    if RQ_main.QueueSize() == 0 and RQ_work.QueueSize() == 0:
        logger.info("Process Complete All Queues Empty")
    elif RQ_main.QueueSize() == 0 and RQ_work.QueueSize() != 0:
        logger.warning("Process Done Work Queue NOT Empty Contains %s Files",
                       str(RQ_work.QueueSize()))

    logger.info("Ingest Complete")

Python db_connect примеры использования