print '(R)emove incomplete file sets / (s)kip?' response = sys.stdin.readline().strip() if response == 'R' or response == 's' or response == 'l': break else: response = 's' if response == 'R': while True: try: suffix = incompletes.pop() except IndexError: break for file in filter(lambda s: suffix in s, allFiles): print 'dsrm', path + '/' + file rmlink(path + '/' + file) resolved = [] for iD in range(len(duplicates)): if suffix in duplicates[iD]: duplicates[iD].remove(suffix) if len(duplicates[iD]) < 2: resolved.append(duplicates[iD]) for suffices in resolved: duplicates.remove(suffices) if len(duplicates): if not options.output: while True: print '(R)emove duplicates individually / remove (All) duplicates / (s)kip / (l)ist?'
def check_dataset(path, names, exclude = [], removeEmpty = False): patterns = [] for name in names: patterns.append(re.compile(name[0:name.rfind('.')] + '_(([0-9]+)(?:_[0-9]+_[a-zA-Z0-9]{3}|))[.]' + name[name.rfind('.') + 1:])) lists = {} # {jobNumber: {suffix-1: [pat1exists, pat2exists, ..], suffix-2: [pat1exists, pat2exists, ..]}} for file in os.listdir(path): for iP in range(len(patterns)): pattern = patterns[iP] matches = pattern.match(file) if matches: jobNumber = int(matches.group(2)) if jobNumber in exclude: break if removeEmpty: pfn = os.readlink(path + '/' + file) try: size = os.stat(pfn).st_size except OSError: size = 0 if size == 0: print 'dsrm', path + '/' + file rmlink(path + '/' + file) break if jobNumber not in lists: lists[jobNumber] = {} suffix = matches.group(1) if suffix not in lists[jobNumber]: lists[jobNumber][suffix] = [False] * len(patterns) lists[jobNumber][suffix][iP] = True break else: print 'File', file, 'does not match any given patterns' continue # check if we had non-matching patterns iP = 0 while iP != len(patterns): for combinations in lists.values(): for existenceList in combinations.values(): if existenceList[iP]: break else: continue break else: # no match found for the pattern for combinations in lists.values(): for existenceList in combinations.values(): existenceList.pop(iP) patterns.pop(iP) iP -= 1 iP += 1 incompletes = [] # list of suffices duplicates = [] # list of list of suffices for jobNumber, combinations in lists.items(): for suffix, existenceList in combinations.items(): if not reduce(lambda x, y: x and y, existenceList): incompletes.append(suffix) if len(combinations) > 1: duplicates.append(combinations.keys()) absents = sorted(set(range(1, max(lists.keys()) + 1)) - set(lists.keys())) return len(lists), incompletes, duplicates, absents
def dscp(source, lfn, logfunc = print, force = False): logfunc('dscp', source, lfn) if os.path.islink(lfn): if force: try: dsrm.rmlink(lfn) except: logfunc('Failed to remove', lfn) return False else: logfunc(lfn, 'already exists as LFN') return False if lfn[0:7] != '/store/': logfunc('LFN must start with /store/') return False lfdir = os.path.dirname(lfn) if os.path.exists(lfdir): if not os.path.isdir(lfdir): logfunc('Cannot make', lfdir, 'into a directory') return False else: os.makedirs(lfdir) target = random.choice(disks) pfn = lfn.replace('/store', target) pfdir = os.path.dirname(pfn) if os.path.exists(pfdir): if not os.path.isdir(pfdir): logfunc('Cannot make', pfdir, 'into a directory') return False else: os.makedirs(pfdir) if ':' in source: proc = subprocess.Popen(['scp', '-oStrictHostKeyChecking=no', source, pfn], stdout = subprocess.PIPE, stderr = subprocess.STDOUT) while proc.poll() is None: pass if proc.returncode != 0: response = '' while True: line = subprocess.stdout.readline() if not line: break response += line logfunc('Copy failed:', response) return False else: try: shutil.copyfile(source, pfn) except: logfunc('Copy failed:\n', excDump()) return False os.symlink(pfn, lfn) return True