Пример #1
0
def renameFilesVersion(dset, dsetVersion, pathlist, session, cfHandler, configOptions, aggregateDimensionName=None, offline=False, progressCallback=None, stopEvent=None, keepVersion=False, newVersion=None, extraFields=None, **context):

    info("Renaming files in dataset: %s, version %d"%(dset.name, dsetVersion.version))

    # Get the list of FileVersion objects for this version
    locdict = {}
    todelete = {}
    for fobj in dsetVersion.getFileVersions():
        loc = fobj.location
        locdict[loc] = todelete[loc] = fobj

    basedict = dset.getBaseDictionary()

    nfiles = len(pathlist)

    varlocate = configOptions['variable_locate']
    seq = 0
    for path, size in pathlist:

        # If the file exists, rename it
        oldpath = None
        if extraFields is not None:
            oldpath = extraFieldsGet(extraFields, (dset.name, path, 'from_file'), dsetVersion)
        if oldpath is None:
            info("No from_file field for file %s, skipping"%path)
            continue

        if locdict.has_key(oldpath):
            fileVersionObj = locdict[oldpath]
            fileObj = fileVersionObj.parent
            if not os.path.exists(path):
                info("File not found: %s, skipping"%path)
                continue
            info("Renaming %s to %s"%(oldpath, path))
            del basedict[fileObj.base]
            base = generateFileBase(path, basedict, dset.name)
            fileObj.base = base
            basedict[base] = 1
            fileVersionObj.location = path
            del locdict[oldpath]
            locdict[path] = fileVersionObj
        else:
            info("File entry %s not found, skipping"%oldpath)
            continue

        seq += 1

        # Callback progress
        try:
            issueCallback(progressCallback, seq, nfiles, 0, 1, stopEvent=stopEvent)
        except:
            session.rollback()
            session.close()
            raise

    return False
Пример #2
0
def renameFilesVersion(dset, dsetVersion, pathlist, session, cfHandler, configOptions, aggregateDimensionName=None, offline=False, progressCallback=None, stopEvent=None, keepVersion=False, newVersion=None, extraFields=None, **context):

    info("Renaming files in dataset: %s, version %d"%(dset.name, dsetVersion.version))

    # Get the list of FileVersion objects for this version
    locdict = {}
    todelete = {}
    for fobj in dsetVersion.getFileVersions():
        loc = fobj.location
        locdict[loc] = todelete[loc] = fobj

    basedict = dset.getBaseDictionary()

    nfiles = len(pathlist)

    varlocate = configOptions['variable_locate']
    seq = 0
    for path, size in pathlist:

        # If the file exists, rename it
        oldpath = None
        if extraFields is not None:
            oldpath = extraFieldsGet(extraFields, (dset.name, path, 'from_file'), dsetVersion)
        if oldpath is None:
            info("No from_file field for file %s, skipping"%path)
            continue

        if locdict.has_key(oldpath):
            fileVersionObj = locdict[oldpath]
            fileObj = fileVersionObj.parent
            if not os.path.exists(path):
                info("File not found: %s, skipping"%path)
                continue
            info("Renaming %s to %s"%(oldpath, path))
            del basedict[fileObj.base]
            base = generateFileBase(path, basedict, dset.name)
            fileObj.base = base
            basedict[base] = 1
            fileVersionObj.location = path
            del locdict[oldpath]
            locdict[path] = fileVersionObj
        else:
            info("File entry %s not found, skipping"%oldpath)
            continue

        seq += 1

        # Callback progress
        try:
            issueCallback(progressCallback, seq, nfiles, 0, 1, stopEvent=stopEvent)
        except:
            session.rollback()
            session.close()
            raise

    return False
Пример #3
0
def updateDatasetVersion(dset, dsetVersion, pathlist, session, handler, cfHandler, configOptions, aggregateDimensionName=None, offline=False, progressCallback=None, stopEvent=None, extraFields=None, replace=False, forceRescan=False, **context):

    if replace:
        info("Replacing files in dataset: %s, version %d"%(dset.name, dsetVersion.version))
    else:
        info("Updating files in dataset: %s, version %d"%(dset.name, dsetVersion.version))

    haveLatestDsetVersion = (dsetVersion.version == dset.getVersion())

    # Get the list of FileVersion objects for this version
    locdict = {}
    todelete = {}
    for fobj in dsetVersion.getFileVersions():
        loc = fobj.location
        locdict[loc] = todelete[loc] = fobj

    varlocate = configOptions['variable_locate']
    checksumClient = configOptions['checksumClient']
    checksumType = configOptions['checksumType']

    # Get the base dictionary for the entire dataset
    basedict = dset.getBaseDictionary()

    # For each item in the pathlist:
    seq = 0
    fileModified = False                # Any file has been modified (added, replaced, or deleted)
    newFileVersionObjs = []
    nfiles = len(pathlist)
    for path, sizet in pathlist:

        # Rescan this file if it has been added, or replaced
        rescanFile = haveLatestDsetVersion

        size, mtime=sizet
        csum = None
        csumtype = checksumType
        techNotes = None
        techNotesTitle = None
        datasetTechNotes = None
        datasetTechNotesTitle = None
        if extraFields is not None:
            csum = extraFieldsGet(extraFields, (dset.name, path, 'checksum'), dsetVersion)
            csumtype = extraFieldsGet(extraFields, (dset.name, path, 'checksum_type'), dsetVersion)
            techNotes = extraFields.get((dset.name, -1, path, 'tech_notes'), None)
            techNotesTitle = extraFields.get((dset.name, -1, path, 'tech_notes_title'), None)
            datasetTechNotes = extraFields.get((dset.name, -1, path, 'dataset_tech_notes'), None)
            datasetTechNotesTitle = extraFields.get((dset.name, -1, path, 'dataset_tech_notes_title'), None)
        if csum is None and not offline and checksumClient is not None:
            csum = checksum(path, checksumClient)
            csumtype = checksumType

        # Cache the dataset tech notes info for later use
        if datasetTechNotes is not None:
            dset.dataset_tech_notes = datasetTechNotes
            dset.dataset_tech_notes_title = datasetTechNotesTitle

        # Check if 'from_file' was specified for this file
        fromfile = None
        if extraFields is not None:
            fromfile = extraFieldsGet(extraFields, (dset.name, path, 'from_file'), dsetVersion)
        if fromfile is None:
            oldpath = path
        else:
            frombase = os.path.basename(fromfile)
            tobase = os.path.basename(path)
            if frombase!=tobase:
                info("Basenames are different for files: %s and %s. Ignoring 'from_file' option."%(path, fromfile))
                oldpath = path
            else:
                oldpath = fromfile

        # If the item is in the current dataset version, get the file version obj and add to the list
        if locdict.has_key(oldpath):
            del todelete[oldpath]
            fileVersionObj = locdict[oldpath]
            fileObj = fileVersionObj.parent
            
            # If the file matches the existing file version, no-op, ...
            if os.path.exists(oldpath) and compareFiles(fileVersionObj, handler, path, size, offline, checksum=csum):
                if not forceRescan:
                    info("File %s exists, skipping"%path)
                newFileVersionObjs.append(fileVersionObj)
                rescanFile = False

            # ... else create a new version of the file
            else:
                if oldpath!=path:
                    info("Replacing file %s"%oldpath)
                newFileVersionObj = FileVersionFactory(fileObj, path, session, size, mod_time=mtime, checksum=csum, checksum_type=csumtype, tech_notes=techNotes, tech_notes_title=techNotesTitle)
                newFileVersionObjs.append(newFileVersionObj)
                fileObj.deleteChildren(session)
                fileModified = True

        # Else create a new file / file version object and add to the list ...
        else:
            fileObj = FileFactory(dset, path, basedict, session)
            newFileVersionObj = FileVersionFactory(fileObj, path, session, size, mod_time=mtime, checksum=csum, checksum_type=csumtype, tech_notes=techNotes, tech_notes_title=techNotesTitle)
            newFileVersionObjs.append(newFileVersionObj)
            fileModified = True

        # ... and rescan if necessary
        if rescanFile or forceRescan:
            if not offline:
                info("Scanning %s"%path)
                f = handler.openPath(path)
                extractFromFile(dset, f, fileObj, session, cfHandler, aggdimName=aggregateDimensionName, varlocate=varlocate, **context)
                f.close()
            else:
                info("File %s is offline"%path)

        # Callback progress
        seq += 1
        try:
            issueCallback(progressCallback, seq, nfiles, 0, 1, stopEvent=stopEvent)
        except:
            session.rollback()
            session.close()
            raise

    # If updating, add the file version objects ...
    if not replace:
        for fileVersionObj in todelete.values():
            newFileVersionObjs.append(fileVersionObj)

    # ... else if rescanning delete the file object children
    elif haveLatestDsetVersion:
        for fileVersionObj in todelete.values():
            fileObj = fileVersionObj.parent
            fileObj.deleteChildren(session)
            fileModified = True

    # Create a new dataset version if:
    # - a file has been added, replaced, or deleted, and
    # - the current version is the latest
    createNewDatasetVersion = haveLatestDsetVersion and fileModified
    
    return createNewDatasetVersion, newFileVersionObjs
Пример #4
0
def updateDatasetVersion(dset, dsetVersion, pathlist, session, handler, cfHandler, configOptions, aggregateDimensionName=None, offline=False, progressCallback=None, stopEvent=None, extraFields=None, replace=False, forceRescan=False, useVersion=-1, **context):

    if replace:
        info("Replacing files in dataset: %s, version %d"%(dset.name, dsetVersion.version))
    else:
        info("Updating files in dataset: %s, version %d"%(dset.name, dsetVersion.version))

    haveLatestDsetVersion = (dsetVersion.version == dset.getVersion())

    # Get the list of FileVersion objects for this version
    locdict = {}
    todelete = {}
    for fobj in dsetVersion.getFileVersions():
        loc = fobj.location
        locdict[loc] = todelete[loc] = fobj

    varlocate = configOptions['variable_locate']
    checksumClient = configOptions['checksumClient']
    checksumType = configOptions['checksumType']
    exclude_variables = configOptions['exclude_variables']
    perVariable = configOptions['perVariable']

    # Get the base dictionary for the entire dataset
    basedict = dset.getBaseDictionary()

    # For each item in the pathlist:
    seq = 0
    fileModified = False                # Any file has been modified (added, replaced, or deleted)
    newFileVersionObjs = []
    nfiles = len(pathlist)
    for path, sizet in pathlist:

        # Rescan this file if it has been added, or replaced
        rescanFile = haveLatestDsetVersion

        size, mtime=sizet
        csum = None
        csumtype = checksumType
        techNotes = None
        techNotesTitle = None
        datasetTechNotes = None
        datasetTechNotesTitle = None
        if extraFields is not None:
            if useVersion != -1:
                csum = extraFields.get((dset.name, useVersion, path, 'checksum'), None)
                csumtype = extraFields.get((dset.name, useVersion, path, 'checksum_type'), None)
            else:
                csum = extraFieldsGet(extraFields, (dset.name, path, 'checksum'), dsetVersion)
                csumtype = extraFieldsGet(extraFields, (dset.name, path, 'checksum_type'), dsetVersion)
            techNotes = extraFields.get((dset.name, useVersion, path, 'tech_notes'), None)
            techNotesTitle = extraFields.get((dset.name, useVersion, path, 'tech_notes_title'), None)
            datasetTechNotes = extraFields.get((dset.name, useVersion, path, 'dataset_tech_notes'), None)
            datasetTechNotesTitle = extraFields.get((dset.name, useVersion, path, 'dataset_tech_notes_title'), None)
        if csum is None and not offline and checksumClient is not None:
            csum = checksum(path, checksumClient)
            csumtype = checksumType

        # Cache the dataset tech notes info for later use
        if datasetTechNotes is not None:
            dset.dataset_tech_notes = datasetTechNotes
            dset.dataset_tech_notes_title = datasetTechNotesTitle

        # Check if 'from_file' was specified for this file
        fromfile = None
        if extraFields is not None:
            fromfile = extraFieldsGet(extraFields, (dset.name, path, 'from_file'), dsetVersion)
        if fromfile is None:
            oldpath = path
        else:
            frombase = os.path.basename(fromfile)
            tobase = os.path.basename(path)
            if frombase!=tobase:
                info("Basenames are different for files: %s and %s. Ignoring 'from_file' option."%(path, fromfile))
                oldpath = path
            else:
                oldpath = fromfile

        # If the item is in the current dataset version, get the file version obj and add to the list
        if locdict.has_key(oldpath):
            del todelete[oldpath]
            fileVersionObj = locdict[oldpath]
            fileObj = fileVersionObj.parent
            
            # If the file matches the existing file version, no-op, ...
            if os.path.exists(oldpath) and compareFiles(fileVersionObj, handler, path, size, offline, checksum=csum):
                if not forceRescan:
                    info("File %s exists, skipping"%path)
                newFileVersionObjs.append(fileVersionObj)
                rescanFile = False

            # ... else create a new version of the file
            else:
                if oldpath!=path:
                    info("Replacing file %s"%oldpath)
                newFileVersionObj = FileVersionFactory(fileObj, path, session, size, mod_time=mtime, checksum=csum, checksum_type=csumtype, tech_notes=techNotes, tech_notes_title=techNotesTitle)
                newFileVersionObjs.append(newFileVersionObj)
                fileObj.deleteChildren(session)
                fileModified = True

        # Else create a new file / file version object and add to the list ...
        else:
            fileObj = FileFactory(dset, path, basedict, session)
            newFileVersionObj = FileVersionFactory(fileObj, path, session, size, mod_time=mtime, checksum=csum, checksum_type=csumtype, tech_notes=techNotes, tech_notes_title=techNotesTitle)
            newFileVersionObjs.append(newFileVersionObj)
            fileModified = True

        # ... and rescan if necessary
        if rescanFile or forceRescan:
            if not offline:
                info("Scanning %s"%path)
                f = handler.openPath(path)
                extractFromFile(dset, f, fileObj, session, handler, cfHandler, aggdimName=aggregateDimensionName, varlocate=varlocate, exclude_variables=exclude_variables, perVariable=perVariable, **context)
                f.close()
            else:
                info("File %s is offline"%path)

        # Callback progress
        seq += 1
        try:
            issueCallback(progressCallback, seq, nfiles, 0, 1, stopEvent=stopEvent)
        except:
            session.rollback()
            session.close()
            raise

    # If updating, add the file version objects ...
    if not replace:
        for fileVersionObj in todelete.values():
            newFileVersionObjs.append(fileVersionObj)

    # ... else if rescanning delete the file object children
    elif haveLatestDsetVersion:
        for fileVersionObj in todelete.values():
            fileObj = fileVersionObj.parent
            fileObj.deleteChildren(session)
            fileModified = True

    # Create a new dataset version if:
    # - a file has been added, replaced, or deleted, and
    # - the current version is the latest
    createNewDatasetVersion = haveLatestDsetVersion and fileModified

    return createNewDatasetVersion, newFileVersionObjs