Пример #1
0
def readDB(db, calcId, cols):
    with sqlConnect(db) as con:
        cur = con.cursor()
        cur.execute("SELECT geomid,results from Calc where CalcId=?",
                    (calcId, ))
        CalcRow = [[i] + j.split() for i, j in cur]
        try:
            CalcRow = np.array(CalcRow, dtype=np.float64)
        except:  # somthing wrong with the res files
            ind = len(CalcRow[0])
            for i in CalcRow:
                if ind != len(i):
                    raise Exception(
                        "Some thing wrong with the result of GeomID %s" % i[0])

        cur.execute("SELECT id,%s from Geometry" % cols)
        GeomRow = np.array(cur.fetchall())

    # This is the list of indexes in geomrow corresponding to the id in calcrow
    sortedIndex = np.searchsorted(GeomRow[:, 0],
                                  CalcRow[:, 0],
                                  sorter=GeomRow[:, 0].argsort())
    # each row of resArr  contains [rho, phi, results...]
    resArr = np.column_stack([GeomRow[sortedIndex][:, 1:], CalcRow[:, 1:]])
    # sort out jumbling of rho, phi values, also remove any duplicates in process, be careful !!!
    resArr = np.unique(resArr, axis=0)
    return resArr
Пример #2
0
def deleteCalcs(dB, pesDir, calcId, geomIdList):
    geomStr = ' where calcid = %s and geomid in ('%calcId + ','.join(map(str, geomIdList)) + ')'

    with sqlConnect(dB) as con:
        cur = con.cursor()

        cur.execute('select geomid from expcalc' + geomStr)
        geomList = cur.fetchall()
        for (geomId,) in geomList:
            cur.execute('delete from expcalc where geomid = ? and calcid = ?',(geomId,calcId))
            print("CalcId = {}, GeomId = {} deleted from ExpCalc Table".format(calcId,geomId))

        cur.execute('select type from CalcInfo where id=?',(calcId,))
        calcName = cur.fetchone()[0]
        cur.execute('select geomid from calc' + geomStr)
        geomList = cur.fetchall()
        for (geomId,) in geomList:
            dirToRemove = '{}/geom{}/{}{}'.format(pesDir, geomId,calcName, calcId)
            if os.path.isdir(dirToRemove): # geomdata is in folder format
                shutil.rmtree(dirToRemove)
            elif os.path.isfile(dirToRemove+'.tar.bz2'): # if is it in archived
                os.remove(dirToRemove+'.tar.bz2')
            else:
                print("No GeomData found for CalcId = {}, GeomId = {}".format(calcId,geomId) )
            cur.execute('delete from calc where geomid = ? and calcid = ?',(geomId,calcId))
            print("CalcId = {}, GeomId = {} deleted from CalcTable and GeomData".format(calcId,geomId))
Пример #3
0
def resetCalcs(db):
    # deleteCalcs for all calculations 
    # clear geomdata 
    # clear expdir and rundir
    with sqlConnect(dB) as con:
        cur = con.cursor()
        print("Clearing up tables Calc, ExpCalc, Exports")
        cur.execute("delete from Calc")
        cur.execute("delete from ExpCalc")
        cur.execute("delete from Exports")
    print("Deleting directories GeomData, ExpDir, RunDir")
    shutil.rmtree('GeomData')
    shutil.rmtree('ExpDir')
    shutil.rmtree('RunDir')
Пример #4
0
def GetExpMrciNactJobs(dB, calcId, jobs, gidList, constDb):

    with sqlConnect(dB) as con:
        cur = con.cursor()
        # #---WARNING:::: turning off constraint, its not that used anyway
        sqlQuery = '''SELECT GeomId,Id FROM Calc 
                    WHERE CalcId = 1 and 
                    GeomId not in (SELECT GeomId FROM Calc WHERE CalcId={0} UNION SELECT GeomId FROM ExpCalc WHERE CalcId={0}) 
                    {2} LIMIT {1}'''.format(
            calcId, jobs, ' and GeomId in ({})'.format(','.join(
                map(str, gidList))) if gidList else '')

        cur.execute(sqlQuery)
        expGClist = cur.fetchall()

    assert expGClist, "No Exportable geometries found"  # preventing null exports
    return expGClist
Пример #5
0
def successiveExport(dB, calcId, jobs, gidList, constDb):
    with sqlConnect(dB) as con:
        cur = con.cursor()

        # get all geoms that is left in the expcalc table for all previous calc ids (greater than 1)
        # if its left in the expcalc (for calcid >1) then it's calcid=1/mcscf was obviously successful.
        sqlQuery = '''SELECT GeomId,Id FROM Calc 
                    WHERE CalcId = 1 and
                    GeomId in (SELECT GeomId from ExpCalc where CalcId={0}) and 
                    GeomId not in (SELECT GeomId FROM Calc WHERE CalcId={1} UNION SELECT GeomId FROM ExpCalc WHERE CalcId={1}) 
                    {3} LIMIT {2}'''.format(
            calcId - 1, calcId, jobs, ' and GeomId in ({})'.format(','.join(
                map(str, gidList))) if gidList else '')

        cur.execute(sqlQuery)
        expGClist = cur.fetchall()

    assert expGClist, "No Exportable geometries found"  # preventing null exports
    return expGClist
Пример #6
0
def status(dB):
    with sqlConnect(dB) as con:
        cur = con.cursor()
        status = '-'*90+'\033[31m\n\033[5m\033[4mPESMan Status:\033[0m\t\t'
        cur.execute('select count(id) from Geometry')
        status+= '\033[35m\033[4mTotal number of geometries: {}\033[0m\n'.format(cur.fetchone()[0])
        cur.execute('select type from CalcInfo')
        names = [i[0] for i in cur]
        if len(names)==0:
            status+='{0}{1}{0}'.format('='*90,'\n\t\tNo Calcs are avialable\n')
            print(status)
            return
        status += "{0}\n{1:^10}|{2:^13}|{3:^20}|{4:^20}|{5:^20}\n{0}".format(
                '='*90,'CalcId','CalcName','Exported Jobs No.','Imported Jobs No.', 'Jobs in ExpCalc')
        for i,name in enumerate(names, start=1):
            cur.execute('select sum(NumCalc) from Exports where calcid=?',(i,))
            tE = cur.fetchone()[0]
            tE = tE if tE else 0
            cur.execute('select count(*) from calc where calcid=?',(i,))
            tD = cur.fetchone()[0]
            cur.execute('select count(*) from ExpCalc where calcid=?',(i,))
            tEx = cur.fetchone()[0]
            status +="\n{:^10}|{:^13}|{:^20}|{:^20}|{:^20}\n{}".format(i,name,tE,tD,tEx,'-'*90)
        print(status)

        # Check for SemiCalc information 
        cur.execute("SELECT name FROM sqlite_master WHERE type='table';")
        tables = [i for i, in cur.fetchall()]
        if "SemiCalc" in tables:
            cur.execute("SELECT count(*) from SemiCalc;")
            print("No. of results in SemiCalc table {}".format(cur.fetchone()[0]))

        status = "\033[31m\n\033[4mDirectory Stats:\033[0m\t\t\n" + "="*90 + "\n"
        subDirs = [x for x in os.listdir('.') if os.path.isdir(x)]
        for s in subDirs:
            status += " {:<13} ---  {}\n".format(s,size(s))
        status += " {:<13} ---  {}\n".format('Total',size('.')) + "-"*90 
        # for s in subDirs: # also prints number of files and folders
        #   status += " {:<13} ---  {}  ({:>3} folders & {:>3} files)\n".format(s,size(s),folders(s),files(x))
        # status += " {:<13} ---  {}  ({:>3} folders & {:>3} files)\n".format('Total',size('.'),folders('.'),files('.')) + "-"*90
        print(status)
        lastM = time.ctime(os.path.getmtime(os.path.expanduser(dB)))
        print("Database last modified on : {} \n".format(lastM))
Пример #7
0
def ExportJobs(dB, calcId, jobs, np, exportDir, pesDir, templ, gidList,
               sidList, depth, constDb, includePath, molInfo, par, logger):
    # Main export function that exports a given number of jobs for a specified calcid type
    # following collects the geomid that are exportable and the calc table id which will be used as their start info
    if calcId == 1:  # Mrci or nact export
        ExpGeomList = GetExpGeomNearNbr(dB, calcId, gidList, sidList, jobs,
                                        depth, constDb, includePath)
    elif calcId == 2:
        ExpGeomList = GetExpMrciNactJobs(dB, calcId, jobs, gidList, constDb)
    else:
        raise Exception('No calculation is defined for CalId=', calcId)
        ExpGeomList = successiveExport(dB, calcId, jobs, gidList, constDb)

    with sqlConnect(dB) as con:
        con.row_factory = sqlRow
        cur = con.cursor()

        cur.execute('SELECT * from CalcInfo WHERE Id=?', (calcId, ))
        InfoRow = cur.fetchone()
        assert InfoRow, "No Info for CalcId={} found in data base".format(
            calcId)

        calcName, template = InfoRow['Type'], InfoRow[
            'InpTempl']  # unpack calcname and the template from the info table

        # insert into database one row and use the id as export id
        cur.execute("INSERT INTO Exports (CalcId) VALUES (?)", (calcId, ))
        exportId = cur.lastrowid

        expDir = "{}/Export{}-{}{}".format(exportDir, exportId, calcName,
                                           calcId)
        # remove the export directory if already exists, may have created from some failed export.
        if os.path.exists(expDir):
            shutil.rmtree(expDir)  #<<<--- shouldn't have happened
        os.makedirs(expDir)

        if templ:  # if template given use that, o/w use from calcinfo table
            with open(templ, 'r') as f:
                template = f.read()

        jobs = []
        for ind, (geomId, startId) in enumerate(ExpGeomList, start=1):
            cur.execute('SELECT * from Geometry WHERE Id=?', (geomId, ))
            geom = dict(
                cur.fetchone())  # to be parsed in geometry, saving unnecessary

            if startId:
                cur.execute('SELECT GeomId,Dir from Calc WHERE Id=?',
                            (startId, ))
                startGId, startDir = cur.fetchone()
            else:
                startGId, startDir = 0, None

            jobs.append([
                geomId, geom, calcId, expDir, template, calcName, startGId,
                startDir, ind, logger.info if np == 1 else print
            ])
        # the logging module though thread safe, can't write from multiple processes, so with multiprocessing simple print is used
        # This situation can be handled properly with an explicit threading queue, but I didn't want to make it all that complicated
        # That means in parallel case the statements inside ExportCalc won't be logged to PESMan.log, only be print to console
        if np == 1:
            expDirs = [ExportCalc(i) for i in jobs]
        else:  # if parallel export is requested
            pool = Pool(processes=np)
            expDirs = pool.map(ExportCalc, jobs)
            pool.close()

        # update the export table and expcalc tables with the exported jobs
        expGeomIds = [i for i, _ in ExpGeomList]
        cur.execute(
            "UPDATE Exports SET NumCalc=?, ExpDT=strftime('%H:%M:%S %d-%m-%Y', datetime('now', 'localtime')), \
                    ExpGeomIds=? WHERE Id=?",
            (len(expDirs), ' '.join(map(str, expGeomIds)), exportId))

        lExpCalc = [(exportId, calcId, i, j)
                    for i, j in zip(expGeomIds, expDirs)]
        cur.executemany(
            "INSERT INTO ExpCalc (ExpId,CalcId,GeomId,CalcDir) VALUES (?,?,?,?)",
            lExpCalc)

        fExportDat = expDir + "/export.dat"  # save the export id and exported directories in export.dat file
        with open(fExportDat, 'w') as f:
            f.write("# Auto generated file. Please do not modify\n" +
                    ' '.join(map(str, [exportId] + expDirs)))

        os.chmod(
            fExportDat, 0o444
        )  # change mode of this file to read-only to prevent accidental writes

        fPythonFile = "{}/RunJob{}.py".format(
            expDir, exportId)  # save the python file that will run the jobs

        if par:  # export job to run parallel geometry
            createRunJobParallel(molInfo, fPythonFile)
        else:  # run a single geometry, mandatory for job like primary mcscf
            createRunJob(
                molInfo,
                fPythonFile)  # use only this if you want no parallel geometry

        logger.info(
            "PESMan export successful: Id {} with {} job(s) exported\n".format(
                exportId, len(ExpGeomList)))
        return expDir, exportId, expDirs
Пример #8
0
def ImportJobs(dB, np, expFile, pesDir, iGl, isDel, isZipped, logger):
    # imports jobs from a given export.dat file

    exportDir = os.path.abspath(
        os.path.dirname(expFile))  # get the main export directory
    exportId = re.findall(
        r'Export(\d+)-',
        exportDir)[0]  # get the export id, from the directroy name
    # so it seems the export.dat is really not needed to import a job, just the path directory is required

    with sqlConnect(dB) as con:
        cur = con.cursor()
        cur.execute('SELECT Status FROM Exports WHERE Id=?',
                    (exportId, ))  # check if the export id is open for import
        exp_row = cur.fetchone()
        assert exp_row, "Export Id = {} not found in data base".format(
            exportId)
        assert exp_row[0] == 0, "Export Id = {} is already closed.".format(
            exportId)

        jobs, geomIds = [], []
        # now obtain list of jobs which can be imported.
        cur.execute("SELECT GeomId,CalcDir FROM ExpCalc where ExpId=?",
                    (exportId, ))
        for geomId, calcDir in cur.fetchall():
            dirFull = "{}/{}".format(exportDir, calcDir)  # ab initio directory
            calcFile = "{0}/{1}/{1}.calc".format(exportDir,
                                                 calcDir)  # calcfile name
            if os.path.isfile(calcFile):  # is this job successful?

                jobs.append([
                    dirFull, calcFile, pesDir, iGl, isZipped, isDel,
                    logger.info if np == 1 else print
                ])
                geomIds.append(geomId)

        if np == 1:
            res = [ImportCalc(i) for i in jobs]
        else:  # if parallel import is requested
            pool = Pool(processes=np)
            res = pool.map(ImportCalc, jobs)
            pool.close()

        cur.executemany(
            "INSERT INTO Calc (GeomId,CalcId,Dir,StartGId,Results) VALUES (?, ?, ?, ?, ?)",
            res)
        cur.executemany('DELETE FROM ExpCalc WHERE ExpId=? AND GeomId=? ',
                        [(exportId, geomId) for geomId in geomIds])

        cur.execute(
            "UPDATE Exports SET ImpDT=strftime('%H:%M:%S %d-%m-%Y', datetime('now', 'localtime')) WHERE Id=?",
            (exportId, ))

        cur.execute("SELECT count(*) FROM ExpCalc WHERE ExpId=?", (exportId, ))
        if cur.fetchone()[0] == 0:
            cur.execute("UPDATE Exports SET Status=1 WHERE Id=?", (exportId, ))
            logger.info('Export Id={} is now closed.'.format(exportId))
            if isDel:
                logger.info("Deleting export directory {}".format(exportDir))
                shutil.rmtree(exportDir)
        else:
            logger.info('Export Id={} is not closed.'.format(exportId))
        logger.info("{} Job(s) have been successfully imported.\n".format(
            len(geomIds)))
Пример #9
0
def GetExpGeomNearNbr(dB, calcId, gidList, sidList, jobs, maxDepth, constDb,
                      inclPath):
    # Get the exportable geometries and their start id for mulit jobs, returns
    with sqlConnect(dB) as con:
        cur = con.cursor()

        # get jobs that is already done and add to excludegeomlist
        cur.execute("SELECT GeomId,Id FROM Calc WHERE CalcId=?", (calcId, ))
        DictCalcId = dict(cur.fetchall())
        CalcGeomIds = set(DictCalcId.keys())
        ExcludeGeomIds = CalcGeomIds.copy()

        cur.execute("SELECT GeomId FROM ExpCalc WHERE CalcId=?", (calcId, ))
        ExcludeGeomIds.update((i for (i, ) in cur))

        if gidList:
            sidList += [-1] * (len(gidList) - len(sidList)
                               )  # fill sidList if some missing
            DictStartId = dict(zip(gidList,
                                   sidList))  # create a dict of start ids

        cond = []
        if gidList:
            cond.append('id in (' + ",".join(map(str, gidList)) +
                        ')')  #include gidlist
        if constDb: cond.append('(' + constDb + ')')  #include constraint
        if not inclPath:
            cond.append("( tags NOT LIKE '%path%' or tags is null)"
                        )  #exclude pathological
        # `tags is null` is added as `not like '%path%'` doesn't match `null`!!! Is this the correct way?
        cond = ' where ' + ' and '.join(cond) if cond else ''
        cur.execute('SELECT Id,Nbr FROM Geometry' + cond)

        expGClist = []  # list that collects the tuple exportable jobs info
        fullGeomList = []  # a naive approach: store all the missed geometries
        for geomId, nbrList in cur:
            if geomId in ExcludeGeomIds:
                continue  # geometry already exist, skip
            if gidList and DictStartId[
                    geomId] >= 0:  # negetive start id will go to main neighbour searching
                nbrId = DictStartId[geomId]  # i.e. 0 or positive startid given
                if not nbrId:  # 0 startid nothing to do here
                    expGClist.append((geomId, 0))
                elif nbrId in CalcGeomIds:  # positive start id, include if calculation is already done
                    expGClist.append((geomId, DictCalcId[nbrId]))
                if len(expGClist) == jobs:
                    return expGClist  # got all the geometries needed
                continue

            nbrList = list(map(
                int, nbrList.split()))  # Care ful about integer mapping
            nbrId = nbrList[
                0]  # for this initial loop only consider first neighbour

            if nbrId in CalcGeomIds:
                expGClist.append((geomId, DictCalcId[nbrId]))  # got one match
                if len(expGClist) == jobs:
                    return expGClist
                continue
            fullGeomList.append((geomId, nbrList))

        #Get allowed depth. Provided every nbr list has same number of elements. This is be quite bad, do something better
        depth = maxDepth if maxDepth else len(
            fullGeomList[0][1]) if fullGeomList else 0
        exportedGeom = set([])
        for d in range(1, depth):  # depth loop starting from 1 to end,
            for geomId, nbrList in fullGeomList:
                if geomId in exportedGeom: continue

                nbrId = nbrList[d]  # get d-th neighbour
                if nbrId in CalcGeomIds:
                    expGClist.append(
                        (geomId, DictCalcId[nbrId])
                    )  # got one match now don't search for any other neighbours
                    if len(expGClist) == jobs:
                        return expGClist
                    exportedGeom.add(geomId)

    assert expGClist, "No Exportable geometries found"  # preventing null exports
    return expGClist
Пример #10
0
import numpy as np
from sqlite3 import connect as sqlConnect
from PESMan import parseConfig
from ImpExp import parseResult



# list of export.dat files are provided through the default command line arguments
# if no argument is provided then only result is saved from the data base
# Run this as `python <this_filename> <list of export.dat files>


config = parseConfig()
dB = config['DataBase']['db']

with sqlConnect(dB) as con:
    cur = con.cursor()
    # create a new table that will hold the semi successful jobs, no other information is required at this moment
    cur.executescript('''
    CREATE TABLE IF NOT EXISTS SemiCalc(
    GeomId INTEGER NOT NULL,
    CalcId INTEGER NOT NULL,
    Results TEXT);
    ''')

    result = []
    for expFile in sys.argv[1:]: # multiple expdirs is allowed through list input

        exportDir = os.path.abspath(os.path.dirname(expFile))   # get the main export directory
        exportId  = re.findall(r'Export(\d+)-', exportDir)[0]     # get the export id, from the directroy name
        calcId    = re.findall(r'Export\d+-\w+(\d+)',exportDir)[0]
Пример #11
0
    config = parseConfig()
    dbFile = config['DataBase']['db']
    nbrDbFile = config['DataBase']['nbr']

    dbExist = os.path.exists(dbFile)
    # neighbour limit to search for
    lim = 30

    # #%%%%%%%%%%%%%%%%%%%%%%%%%% for scattering hyperspherical %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    sql_script = sql_script.replace('$$', 'rho REAL,\ntheta REAL,\nphi REAL,')
    rho = float(sys.argv[1])
    ranges = [4.5, np.deg2rad(30), np.deg2rad(30)]

    if os.path.exists(nbrDbFile): os.remove(nbrDbFile)

    with sqlConnect(dbFile) as con, sqlConnect(nbrDbFile) as conNbr:
        cur = con.cursor()
        if not dbExist: cur.executescript(sql_script)

        curNbr = conNbr.cursor()
        curNbr.executescript(sql_nbrtable_commands)

        # create the geometry list here
        newGeomList = np.vstack([[rho, 0, 0],
                                 np.stack(np.mgrid[rho:rho:1j, 2:50:25j,
                                                   0:180:181j],
                                          axis=3).reshape(-1, 3),
                                 np.stack(np.mgrid[rho:rho:1j, 51:90:40j,
                                                   0:180:181j],
                                          axis=3).reshape(-1, 3)])