def testLinkSymlink(self): link = "sha1link.txt" fsu.symlinkFile(self._sha1file, link) self.assertSymlink(link) # try to hard link link; this should be a no-op fsu.linkFile(self._sha1file, link) self.assertSymlink(link) self.assertUnlinked(link)
def testSymlinkFile(self): link = "sha1link.txt" fsu.symlinkFile(self._sha1file, link) self.assertSymlink(link) # link again, just to make sure it won't fail fsu.symlinkFile(self._sha1file, link) self.assertSymlink(link) self.assertUnlinked(link)
def dedup(self, dupdir, doSymlink): """ Moves duplicate entries (based on checksum) into the dupdir. Uses the entry's path to reconstruct a subdirectory hierarchy in dupdir. This will remove any common prefixes between dupdir and the file path itself so as to make a useful subdirectory structure. If doSymlink is true, then the original paths of the files that were moved will be symlinked back to the canonical file; in addition, it will keep the file entry in the database rather than removing it.""" logging.info("De-duping database") if os.path.exists(dupdir) and not len(os.listdir(dupdir)) <= 0: raise Exception("%s is not empty; refusing to move files" % dupdir) try: pathmap = {} # store duplicate paths keyed by file checksum with sqliteConn(self.database) as cursor: cursor.execute("""select chksum, path, link from files where chksum in( select chksum from files where symlink = 0 group by chksum having count(chksum) > 1) and symlink = 0 and link = 1 order by chksum, link;""") for row in cursor: (chksum, path, islink) = row if not chksum in pathmap: # ensure existence of list for checksum pathmap[chksum] = [] paths = pathmap[chksum] paths.append(path) for chksum, paths in pathmap.iteritems(): # the query above will result in single rows for symlinked files, so fix that here # rather than mucking about with temp tables paths = filter(lambda path: not os.path.islink(path), paths) # we'll have at least two elements due to the inner part of the query above for path in paths: dst = dstWithSubdirectory(path, dupdir) moveFile( path, dst, (not doSymlink )) # don't rm empty dirs if we are symlinking if not doSymlink: cursor.execute(REMOVE_ROW, (path, )) else: cursor.execute( "update files set symlink = 1 where path = ?;", (path, )) symlinkFile(canonicalPath, path) logging.info("De-duping complete") except Exception as einst: logging.error("Unable to de-dup database: %s" % einst) raise
def dedup(self, dupdir, doSymlink): """ Moves duplicate entries (based on checksum) into the dupdir. Uses the entry's path to reconstruct a subdirectory hierarchy in dupdir. This will remove any common prefixes between dupdir and the file path itself so as to make a useful subdirectory structure. If doSymlink is true, then the original paths of the files that were moved will be symlinked back to the canonical file; in addition, it will keep the file entry in the database rather than removing it.""" logging.info("De-duping database") if os.path.exists(dupdir) and not len(os.listdir(dupdir)) <= 0: raise Exception("%s is not empty; refusing to move files" % dupdir) try: pathmap = {} # store duplicate paths keyed by file checksum with sqliteConn(self.database) as cursor: cursor.execute("""select chksum, path, link from files where chksum in( select chksum from files where symlink = 0 group by chksum having count(chksum) > 1) and symlink = 0 and link = 1 order by chksum, link;""") for row in cursor: (chksum, path, islink) = row if not chksum in pathmap: # ensure existence of list for checksum pathmap[chksum] = [] paths = pathmap[chksum] paths.append(path) for chksum, paths in pathmap.iteritems(): # the query above will result in single rows for symlinked files, so fix that here # rather than mucking about with temp tables paths = filter(lambda path: not os.path.islink(path), paths) # we'll have at least two elements due to the inner part of the query above for path in paths: dst = dstWithSubdirectory(path, dupdir) moveFile(path, dst, (not doSymlink)) # don't rm empty dirs if we are symlinking if not doSymlink: cursor.execute(REMOVE_ROW, (path, )) else: cursor.execute("update files set symlink = 1 where path = ?;", (path, )) symlinkFile(canonicalPath, path) logging.info("De-duping complete") except Exception as einst: logging.error("Unable to de-dup database: %s" % einst) raise
def testSymlinkFileBad(self): self.assertRaises(OSError, lambda: fsu.symlinkFile(None, None)) self.assertRaises(OSError, lambda: fsu.symlinkFile("", "")) self.assertRaises(OSError, lambda: fsu.symlinkFile(None, "")) self.assertRaises(OSError, lambda: fsu.symlinkFile("", None)) self.assertRaises(OSError, lambda: fsu.symlinkFile(self._sha1file, "")) self.assertRaises(OSError, lambda: fsu.symlinkFile(self._sha1file, None))