def renameFilesVersion(dset, dsetVersion, pathlist, session, cfHandler, configOptions, aggregateDimensionName=None, offline=False, progressCallback=None, stopEvent=None, keepVersion=False, newVersion=None, extraFields=None, **context): info("Renaming files in dataset: %s, version %d"%(dset.name, dsetVersion.version)) # Get the list of FileVersion objects for this version locdict = {} todelete = {} for fobj in dsetVersion.getFileVersions(): loc = fobj.location locdict[loc] = todelete[loc] = fobj basedict = dset.getBaseDictionary() nfiles = len(pathlist) varlocate = configOptions['variable_locate'] seq = 0 for path, size in pathlist: # If the file exists, rename it oldpath = None if extraFields is not None: oldpath = extraFieldsGet(extraFields, (dset.name, path, 'from_file'), dsetVersion) if oldpath is None: info("No from_file field for file %s, skipping"%path) continue if locdict.has_key(oldpath): fileVersionObj = locdict[oldpath] fileObj = fileVersionObj.parent if not os.path.exists(path): info("File not found: %s, skipping"%path) continue info("Renaming %s to %s"%(oldpath, path)) del basedict[fileObj.base] base = generateFileBase(path, basedict, dset.name) fileObj.base = base basedict[base] = 1 fileVersionObj.location = path del locdict[oldpath] locdict[path] = fileVersionObj else: info("File entry %s not found, skipping"%oldpath) continue seq += 1 # Callback progress try: issueCallback(progressCallback, seq, nfiles, 0, 1, stopEvent=stopEvent) except: session.rollback() session.close() raise return False
def updateDatasetVersion(dset, dsetVersion, pathlist, session, handler, cfHandler, configOptions, aggregateDimensionName=None, offline=False, progressCallback=None, stopEvent=None, extraFields=None, replace=False, forceRescan=False, **context): if replace: info("Replacing files in dataset: %s, version %d"%(dset.name, dsetVersion.version)) else: info("Updating files in dataset: %s, version %d"%(dset.name, dsetVersion.version)) haveLatestDsetVersion = (dsetVersion.version == dset.getVersion()) # Get the list of FileVersion objects for this version locdict = {} todelete = {} for fobj in dsetVersion.getFileVersions(): loc = fobj.location locdict[loc] = todelete[loc] = fobj varlocate = configOptions['variable_locate'] checksumClient = configOptions['checksumClient'] checksumType = configOptions['checksumType'] # Get the base dictionary for the entire dataset basedict = dset.getBaseDictionary() # For each item in the pathlist: seq = 0 fileModified = False # Any file has been modified (added, replaced, or deleted) newFileVersionObjs = [] nfiles = len(pathlist) for path, sizet in pathlist: # Rescan this file if it has been added, or replaced rescanFile = haveLatestDsetVersion size, mtime=sizet csum = None csumtype = checksumType techNotes = None techNotesTitle = None datasetTechNotes = None datasetTechNotesTitle = None if extraFields is not None: csum = extraFieldsGet(extraFields, (dset.name, path, 'checksum'), dsetVersion) csumtype = extraFieldsGet(extraFields, (dset.name, path, 'checksum_type'), dsetVersion) techNotes = extraFields.get((dset.name, -1, path, 'tech_notes'), None) techNotesTitle = extraFields.get((dset.name, -1, path, 'tech_notes_title'), None) datasetTechNotes = extraFields.get((dset.name, -1, path, 'dataset_tech_notes'), None) datasetTechNotesTitle = extraFields.get((dset.name, -1, path, 'dataset_tech_notes_title'), None) if csum is None and not offline and checksumClient is not None: csum = checksum(path, checksumClient) csumtype = checksumType # Cache the dataset tech notes info for later use if datasetTechNotes is not None: dset.dataset_tech_notes = datasetTechNotes dset.dataset_tech_notes_title = datasetTechNotesTitle # Check if 'from_file' was specified for this file fromfile = None if extraFields is not None: fromfile = extraFieldsGet(extraFields, (dset.name, path, 'from_file'), dsetVersion) if fromfile is None: oldpath = path else: frombase = os.path.basename(fromfile) tobase = os.path.basename(path) if frombase!=tobase: info("Basenames are different for files: %s and %s. Ignoring 'from_file' option."%(path, fromfile)) oldpath = path else: oldpath = fromfile # If the item is in the current dataset version, get the file version obj and add to the list if locdict.has_key(oldpath): del todelete[oldpath] fileVersionObj = locdict[oldpath] fileObj = fileVersionObj.parent # If the file matches the existing file version, no-op, ... if os.path.exists(oldpath) and compareFiles(fileVersionObj, handler, path, size, offline, checksum=csum): if not forceRescan: info("File %s exists, skipping"%path) newFileVersionObjs.append(fileVersionObj) rescanFile = False # ... else create a new version of the file else: if oldpath!=path: info("Replacing file %s"%oldpath) newFileVersionObj = FileVersionFactory(fileObj, path, session, size, mod_time=mtime, checksum=csum, checksum_type=csumtype, tech_notes=techNotes, tech_notes_title=techNotesTitle) newFileVersionObjs.append(newFileVersionObj) fileObj.deleteChildren(session) fileModified = True # Else create a new file / file version object and add to the list ... else: fileObj = FileFactory(dset, path, basedict, session) newFileVersionObj = FileVersionFactory(fileObj, path, session, size, mod_time=mtime, checksum=csum, checksum_type=csumtype, tech_notes=techNotes, tech_notes_title=techNotesTitle) newFileVersionObjs.append(newFileVersionObj) fileModified = True # ... and rescan if necessary if rescanFile or forceRescan: if not offline: info("Scanning %s"%path) f = handler.openPath(path) extractFromFile(dset, f, fileObj, session, cfHandler, aggdimName=aggregateDimensionName, varlocate=varlocate, **context) f.close() else: info("File %s is offline"%path) # Callback progress seq += 1 try: issueCallback(progressCallback, seq, nfiles, 0, 1, stopEvent=stopEvent) except: session.rollback() session.close() raise # If updating, add the file version objects ... if not replace: for fileVersionObj in todelete.values(): newFileVersionObjs.append(fileVersionObj) # ... else if rescanning delete the file object children elif haveLatestDsetVersion: for fileVersionObj in todelete.values(): fileObj = fileVersionObj.parent fileObj.deleteChildren(session) fileModified = True # Create a new dataset version if: # - a file has been added, replaced, or deleted, and # - the current version is the latest createNewDatasetVersion = haveLatestDsetVersion and fileModified return createNewDatasetVersion, newFileVersionObjs
def updateDatasetVersion(dset, dsetVersion, pathlist, session, handler, cfHandler, configOptions, aggregateDimensionName=None, offline=False, progressCallback=None, stopEvent=None, extraFields=None, replace=False, forceRescan=False, useVersion=-1, **context): if replace: info("Replacing files in dataset: %s, version %d"%(dset.name, dsetVersion.version)) else: info("Updating files in dataset: %s, version %d"%(dset.name, dsetVersion.version)) haveLatestDsetVersion = (dsetVersion.version == dset.getVersion()) # Get the list of FileVersion objects for this version locdict = {} todelete = {} for fobj in dsetVersion.getFileVersions(): loc = fobj.location locdict[loc] = todelete[loc] = fobj varlocate = configOptions['variable_locate'] checksumClient = configOptions['checksumClient'] checksumType = configOptions['checksumType'] exclude_variables = configOptions['exclude_variables'] perVariable = configOptions['perVariable'] # Get the base dictionary for the entire dataset basedict = dset.getBaseDictionary() # For each item in the pathlist: seq = 0 fileModified = False # Any file has been modified (added, replaced, or deleted) newFileVersionObjs = [] nfiles = len(pathlist) for path, sizet in pathlist: # Rescan this file if it has been added, or replaced rescanFile = haveLatestDsetVersion size, mtime=sizet csum = None csumtype = checksumType techNotes = None techNotesTitle = None datasetTechNotes = None datasetTechNotesTitle = None if extraFields is not None: if useVersion != -1: csum = extraFields.get((dset.name, useVersion, path, 'checksum'), None) csumtype = extraFields.get((dset.name, useVersion, path, 'checksum_type'), None) else: csum = extraFieldsGet(extraFields, (dset.name, path, 'checksum'), dsetVersion) csumtype = extraFieldsGet(extraFields, (dset.name, path, 'checksum_type'), dsetVersion) techNotes = extraFields.get((dset.name, useVersion, path, 'tech_notes'), None) techNotesTitle = extraFields.get((dset.name, useVersion, path, 'tech_notes_title'), None) datasetTechNotes = extraFields.get((dset.name, useVersion, path, 'dataset_tech_notes'), None) datasetTechNotesTitle = extraFields.get((dset.name, useVersion, path, 'dataset_tech_notes_title'), None) if csum is None and not offline and checksumClient is not None: csum = checksum(path, checksumClient) csumtype = checksumType # Cache the dataset tech notes info for later use if datasetTechNotes is not None: dset.dataset_tech_notes = datasetTechNotes dset.dataset_tech_notes_title = datasetTechNotesTitle # Check if 'from_file' was specified for this file fromfile = None if extraFields is not None: fromfile = extraFieldsGet(extraFields, (dset.name, path, 'from_file'), dsetVersion) if fromfile is None: oldpath = path else: frombase = os.path.basename(fromfile) tobase = os.path.basename(path) if frombase!=tobase: info("Basenames are different for files: %s and %s. Ignoring 'from_file' option."%(path, fromfile)) oldpath = path else: oldpath = fromfile # If the item is in the current dataset version, get the file version obj and add to the list if locdict.has_key(oldpath): del todelete[oldpath] fileVersionObj = locdict[oldpath] fileObj = fileVersionObj.parent # If the file matches the existing file version, no-op, ... if os.path.exists(oldpath) and compareFiles(fileVersionObj, handler, path, size, offline, checksum=csum): if not forceRescan: info("File %s exists, skipping"%path) newFileVersionObjs.append(fileVersionObj) rescanFile = False # ... else create a new version of the file else: if oldpath!=path: info("Replacing file %s"%oldpath) newFileVersionObj = FileVersionFactory(fileObj, path, session, size, mod_time=mtime, checksum=csum, checksum_type=csumtype, tech_notes=techNotes, tech_notes_title=techNotesTitle) newFileVersionObjs.append(newFileVersionObj) fileObj.deleteChildren(session) fileModified = True # Else create a new file / file version object and add to the list ... else: fileObj = FileFactory(dset, path, basedict, session) newFileVersionObj = FileVersionFactory(fileObj, path, session, size, mod_time=mtime, checksum=csum, checksum_type=csumtype, tech_notes=techNotes, tech_notes_title=techNotesTitle) newFileVersionObjs.append(newFileVersionObj) fileModified = True # ... and rescan if necessary if rescanFile or forceRescan: if not offline: info("Scanning %s"%path) f = handler.openPath(path) extractFromFile(dset, f, fileObj, session, handler, cfHandler, aggdimName=aggregateDimensionName, varlocate=varlocate, exclude_variables=exclude_variables, perVariable=perVariable, **context) f.close() else: info("File %s is offline"%path) # Callback progress seq += 1 try: issueCallback(progressCallback, seq, nfiles, 0, 1, stopEvent=stopEvent) except: session.rollback() session.close() raise # If updating, add the file version objects ... if not replace: for fileVersionObj in todelete.values(): newFileVersionObjs.append(fileVersionObj) # ... else if rescanning delete the file object children elif haveLatestDsetVersion: for fileVersionObj in todelete.values(): fileObj = fileVersionObj.parent fileObj.deleteChildren(session) fileModified = True # Create a new dataset version if: # - a file has been added, replaced, or deleted, and # - the current version is the latest createNewDatasetVersion = haveLatestDsetVersion and fileModified return createNewDatasetVersion, newFileVersionObjs