Пример #1
def _updateIndices(db: DB) -> None:
    "Add indices to the DB."
-- syncing
create index if not exists ix_notes_usn on notes (usn);
create index if not exists ix_cards_usn on cards (usn);
create index if not exists ix_revlog_usn on revlog (usn);
-- card spacing, etc
create index if not exists ix_cards_nid on cards (nid);
-- scheduling and deck limiting
create index if not exists ix_cards_sched on cards (did, queue, due);
-- revlog by card
create index if not exists ix_revlog_cid on revlog (cid);
-- field uniqueness
create index if not exists ix_notes_csum on notes (csum);
Пример #2
class MediaManager(object):

    # other code depends on this order, so don't reorder
    regexps = ("(?i)(\[sound:([^]]+)\])",

    def __init__(self, col):
        self.col = col
        # media directory
        self._dir = re.sub("(?i)\.(anki2)$", ".media", self.col.path)
        if not os.path.exists(self._dir):
        self._oldcwd = os.getcwd()
        # change database

    def connect(self):
        if self.col.server:
        path = self.dir()+".db"
        create = not os.path.exists(path)
        self.db = DB(path)
        if create:

    def close(self):
        if self.col.server:
        self.db = None
        # change cwd back to old location

    def dir(self):
        return self._dir

    # Adding media

    def addFile(self, opath):
        """Copy PATH to MEDIADIR, and return new filename.
If the same name exists, compare checksums."""
        mdir = self.dir()
        # remove any dangerous characters
        base = re.sub(r"[][<>:/\\&]", "", os.path.basename(opath))
        dst = os.path.join(mdir, base)
        # if it doesn't exist, copy it directly
        if not os.path.exists(dst):
            shutil.copy2(opath, dst)
            return base
        # if it's identical, reuse
        if self.filesIdentical(opath, dst):
            return base
        # otherwise, find a unique name
        (root, ext) = os.path.splitext(base)
        def repl(match):
            n = int(match.group(1))
            return " (%d)" % (n+1)
        while True:
            path = os.path.join(mdir, root + ext)
            if not os.path.exists(path):
            reg = " \((\d+)\)$"
            if not re.search(reg, root):
                root = root + " (1)"
                root = re.sub(reg, repl, root)
        # copy and return
        shutil.copy2(opath, path)
        return os.path.basename(os.path.basename(path))

    def filesIdentical(self, path1, path2):
        "True if files are the same."
        return (checksum(open(path1, "rb").read()) ==
                checksum(open(path2, "rb").read()))

    # String manipulation

    def filesInStr(self, mid, string, includeRemote=False):
        l = []
        # convert latex first
        model = self.col.models.get(mid)
        string = mungeQA(string, None, None, model, None, self.col)
        # extract filenames
        for reg in self.regexps:
            for (full, fname) in re.findall(reg, string):
                isLocal = not re.match("(https?|ftp)://", fname.lower())
                if isLocal or includeRemote:
        return l

    def strip(self, txt):
        for reg in self.regexps:
            txt = re.sub(reg, "", txt)
        return txt

    def escapeImages(self, string):
        # Feeding webkit unicode can result in it not finding images, so on
        # linux/osx we percent escape the image paths as utf8. On Windows the
        # problem is more complicated - if we percent-escape as utf8 it fixes
        # some images but breaks others. When filenames are normalized by
        # dropbox they become unreadable if we escape them.
        if isWin:
            return string
        def repl(match):
            tag = match.group(1)
            fname = match.group(2)
            if re.match("(https?|ftp)://", fname):
                return tag
            return tag.replace(
                fname, urllib.quote(fname.encode("utf-8")))
        return re.sub(self.regexps[1], repl, string)

    # Rebuilding DB

    def check(self, local=None):
        "Return (missingFiles, unusedFiles)."
        mdir = self.dir()
        # generate card q/a and look through all references
        normrefs = {}
        def norm(s):
            if isinstance(s, unicode):
                return unicodedata.normalize('NFD', s)
            return s
        for f in self.allMedia():
            normrefs[norm(f)] = True
        # loop through directory and find unused & missing media
        unused = []
        if local is None:
            files = os.listdir(mdir)
            files = local
        for file in files:
            if not local:
                path = os.path.join(mdir, file)
                if not os.path.isfile(path):
                    # ignore directories
            nfile = norm(file)
            if nfile not in normrefs:
                del normrefs[nfile]
        nohave = normrefs.keys()
        return (nohave, unused)

    def allMedia(self):
        "Return a set of all referenced filenames."
        files = set()
        for mid, flds in self.col.db.execute("select mid, flds from notes"):
            for f in self.filesInStr(mid, flds):
        return files

    # Copying on import
    # FIXME: check if the files are actually identical, and rewrite references
    # if necessary

    def copyTo(self, rdir):
        "Copy media to RDIR. Return number of files copied."
        ldir = self.dir()
        if not os.path.exists(ldir):
            return 0
        cnt = 0
        for f in os.listdir(ldir):
            src = os.path.join(ldir, f)
            dst = os.path.join(rdir, f)
            if not os.path.exists(dst):
                shutil.copy2(src, dst)
            cnt += 1
        return cnt

    # Media syncing - changes and removal

    def hasChanged(self):
        return self.db.scalar("select 1 from log limit 1")

    def removed(self):
        return self.db.list("select * from log where type = ?", MEDIA_REM)

    def syncRemove(self, fnames):
        # remove provided deletions
        for f in fnames:
            if os.path.exists(f):
            self.db.execute("delete from log where fname = ?", f)
            self.db.execute("delete from media where fname = ?", f)
        # and all locally-logged deletions, as server has acked them
        self.db.execute("delete from log where type = ?", MEDIA_REM)

    # Media syncing - unbundling zip files from server

    def syncAdd(self, zipData):
        "Extract zip data; true if finished."
        f = StringIO(zipData)
        z = zipfile.ZipFile(f, "r")
        finished = False
        meta = None
        media = []
        sizecnt = 0
        # get meta info first
        assert z.getinfo("_meta").file_size < 100000
        meta = simplejson.loads(z.read("_meta"))
        nextUsn = int(z.read("_usn"))
        # then loop through all files
        for i in z.infolist():
            # check for zip bombs
            sizecnt += i.file_size
            assert sizecnt < 100*1024*1024
            if i.filename == "_meta" or i.filename == "_usn":
                # ignore previously-retrieved meta
            elif i.filename == "_finished":
                # last zip in set
                finished = True
                data = z.read(i)
                csum = checksum(data)
                name = meta[i.filename]
                # can we store the file on this system?
                if self.illegal(i.filename):
                # save file
                open(name, "wb").write(data)
                # update db
                media.append((name, csum, self._mtime(name)))
                # remove entries from local log
                self.db.execute("delete from log where fname = ?", name)
        # update media db and note new starting usn
        if media:
                "insert or replace into media values (?,?,?)", media)
        self.setUsn(nextUsn) # commits
        # if we have finished adding, we need to record the new folder mtime
        # so that we don't trigger a needless scan
        if finished:
        return finished

    def illegal(self, f):
        if isWin:
            for c in f:
                if c in "<>:\"/\\|?*^":
                    return True
        elif isMac:
            for c in f:
                if c in ":\\/":
                    return True

    # Media syncing - bundling zip files to send to server
    # Because there's no standard filename encoding for zips, and because not
    # all zip clients support retrieving mtime, we store the files as ascii
    # and place a json file in the zip with the necessary information.

    def zipAdded(self):
        "Add files to a zip until over SYNC_ZIP_SIZE. Return zip data."
        f = StringIO()
        z = zipfile.ZipFile(f, "w", compression=zipfile.ZIP_DEFLATED)
        sz = 0
        cnt = 0
        files = {}
        cur = self.db.execute(
            "select fname from log where type = ?", MEDIA_ADD)
        fnames = []
        while 1:
            fname = cur.fetchone()
            if not fname:
                # add a flag so the server knows it can clean up
                z.writestr("_finished", "")
            fname = fname[0]
            z.write(fname, str(cnt))
            files[str(cnt)] = fname
            sz += os.path.getsize(fname)
            if sz > SYNC_ZIP_SIZE:
            cnt += 1
        z.writestr("_meta", simplejson.dumps(files))
        return f.getvalue(), fnames

    def forgetAdded(self, fnames):
        if not fnames:
        self.db.executemany("delete from log where fname = ?", fnames)

    # Tracking changes (private)

    def _initDB(self):
create table media (fname text primary key, csum text, mod int);
create table meta (dirMod int, usn int); insert into meta values (0, 0);
create table log (fname text primary key, type int);

    def _mtime(self, path):
        return int(os.stat(path).st_mtime)

    def _checksum(self, path):
        return checksum(open(path, "rb").read())

    def usn(self):
        return self.db.scalar("select usn from meta")

    def setUsn(self, usn):
        self.db.execute("update meta set usn = ?", usn)

    def syncMod(self):
        self.db.execute("update meta set dirMod = ?", self._mtime(self.dir()))

    def _changed(self):
        "Return dir mtime if it has changed since the last findChanges()"
        # doesn't track edits, but user can add or remove a file to update
        mod = self.db.scalar("select dirMod from meta")
        mtime = self._mtime(self.dir())
        if mod and mod == mtime:
            return False
        return mtime

    def findChanges(self):
        "Scan the media folder if it's changed, and note any changes."
        if self._changed():

    def _logChanges(self):
        (added, removed) = self._changes()
        log = []
        media = []
        mediaRem = []
        for f in added:
            mt = self._mtime(f)
            media.append((f, self._checksum(f), mt))
            log.append((f, MEDIA_ADD))
        for f in removed:
            log.append((f, MEDIA_REM))
        # update media db
        self.db.executemany("insert or replace into media values (?,?,?)",
        if mediaRem:
            self.db.executemany("delete from media where fname = ?",
        self.db.execute("update meta set dirMod = ?", self._mtime(self.dir()))
        # and logs
        self.db.executemany("insert or replace into log values (?,?)", log)

    def _changes(self):
        self.cache = {}
        for (name, csum, mod) in self.db.execute(
            "select * from media"):
            self.cache[name] = [csum, mod, False]
        added = []
        removed = []
        # loop through on-disk files
        for f in os.listdir(self.dir()):
            # ignore folders
            if os.path.isdir(f):
            # newly added?
            if f not in self.cache:
                # modified since last time?
                if self._mtime(f) != self.cache[f][1]:
                    # and has different checksum?
                    if self._checksum(f) != self.cache[f][0]:
                # mark as used
                self.cache[f][2] = True
        # look for any entries in the cache that no longer exist on disk
        for (k, v) in self.cache.items():
            if not v[2]:
        return added, removed

    def sanityCheck(self):
        assert not self.db.scalar("select count() from log")
        cnt = self.db.scalar("select count() from media")
        return cnt
Пример #3
def _addSchema(db: DB, setColConf: bool = True) -> None:
create table if not exists col (
    id              integer primary key,
    crt             integer not null,
    mod             integer not null,
    scm             integer not null,
    ver             integer not null,
    dty             integer not null,
    usn             integer not null,
    ls              integer not null,
    conf            text not null,
    models          text not null,
    decks           text not null,
    dconf           text not null,
    tags            text not null

create table if not exists notes (
    id              integer primary key,   /* 0 */
    guid            text not null,         /* 1 */
    mid             integer not null,      /* 2 */
    mod             integer not null,      /* 3 */
    usn             integer not null,      /* 4 */
    tags            text not null,         /* 5 */
    flds            text not null,         /* 6 */
    sfld            integer not null,      /* 7 */
    csum            integer not null,      /* 8 */
    flags           integer not null,      /* 9 */
    data            text not null          /* 10 */

create table if not exists cards (
    id              integer primary key,   /* 0 */
    nid             integer not null,      /* 1 */
    did             integer not null,      /* 2 */
    ord             integer not null,      /* 3 */
    mod             integer not null,      /* 4 */
    usn             integer not null,      /* 5 */
    type            integer not null,      /* 6 */
    queue           integer not null,      /* 7 */
    due             integer not null,      /* 8 */
    ivl             integer not null,      /* 9 */
    factor          integer not null,      /* 10 */
    reps            integer not null,      /* 11 */
    lapses          integer not null,      /* 12 */
    left            integer not null,      /* 13 */
    odue            integer not null,      /* 14 */
    odid            integer not null,      /* 15 */
    flags           integer not null,      /* 16 */
    data            text not null          /* 17 */

create table if not exists revlog (
    id              integer primary key,
    cid             integer not null,
    usn             integer not null,
    ease            integer not null,
    ivl             integer not null,
    lastIvl         integer not null,
    factor          integer not null,
    time            integer not null,
    type            integer not null

create table if not exists graves (
    usn             integer not null,
    oid             integer not null,
    type            integer not null

insert or ignore into col
""" % ({
        "v": SCHEMA_VERSION,
        "s": intTime(1000)
    if setColConf:
        _addColVars(db, *_getColVars(db))
Пример #4
class MediaManager:
    _dir -- the directory of media. Unless server is given to the constructor, in this cas it's None. Directory is changed to it during synchronization, and then changed back to previous directory.
    _oldcwd -- the working directory when media manager is created. The directory is changed to this value when the MediaManager is closed. If server is given in the constructor, then it's None.


    """Captures the argument foo of [sound:foo]"""
    soundRegexps = [r"(?i)(\[sound:(?P<fname>[^]]+)\])"]
    """Captures the argument foo of <img src=foo bar>, ignoring quotes around foo."""
    imgRegexps = [
        # src element quoted case
        r"(?i)(<img[^>]* src=(?P<str>[\"'])(?P<fname>[^>]+?)(?P=str)[^>]*>)",
        # unquoted case
        r"(?i)(<img[^>]* src=(?!['\"])(?P<fname>[^ >]+)[^>]*?>)",
    regexps = soundRegexps + imgRegexps

    def __init__(self, col, server):

        server -- always false in Anki"""
        self.col = col
        if server:
            self._dir = None
        # media directory
        self._dir = re.sub(r"(?i)\.(anki2)$", ".media", self.col.path)
        if not os.path.exists(self._dir):
            self._oldcwd = os.getcwd()
        except OSError:
            # cwd doesn't exist
            self._oldcwd = None
        except OSError:
            raise Exception("invalidTempFolder")
        # change database

    def connect(self):
        """Ensure the existence of a database in current format, connected in self.db."""
        if self.col.server:
        path = self.dir()+".db2"
        create = not os.path.exists(path)
        self.db = DB(path)
        if create:

    def _initDB(self):
create table media (
 fname text not null primary key,
 csum text,           -- null indicates deleted file
 mtime int not null,  -- zero if deleted
 dirty int not null

create index idx_media_dirty on media (dirty);

create table meta (dirMod int, lastUsn int); insert into meta values (0, 0);

    def maybeUpgrade(self):
        """Upgrade database in old format to current format."""
        oldpath = self.dir()+".db"
        if os.path.exists(oldpath):
            self.db.execute('attach "../collection.media.db" as old')
    insert into media
     select m.fname, csum, mod, ifnull((select 1 from log l2 where l2.fname=m.fname), 0) as dirty
     from old.media m
     left outer join old.log l using (fname)
     select fname, null, 0, 1 from old.log where type=1;""")
                self.db.execute("delete from meta")
    insert into meta select dirMod, usn from old.meta
            except Exception as e:
                # if we couldn't import the old db for some reason, just start
                # anew
                self.col.log("failed to import old media db:"+traceback.format_exc())
            self.db.execute("detach old")
            npath = "../collection.media.db.old"
            if os.path.exists(npath):
            os.rename("../collection.media.db", npath)

    def close(self):
        """Close database connection.

        don't do anything if server is truthy.
        change dir back to old working dir"""
        if self.col.server:
        self.db = None
        # change cwd back to old location
        if self._oldcwd:
                # may have been deleted

    def _deleteDB(self):
        """Delete connected DB, connect to a new one"""
        path = self.db._path

    def dir(self):
        """The directory of media"""
        return self._dir

    def _isFAT32(self):
        if not isWin:
        # pylint: disable=import-error
        import win32api, win32file
            name = win32file.GetVolumeNameForVolumeMountPoint(self._dir[:3])
            # mapped & unmapped network drive; pray that it's not vfat
        if win32api.GetVolumeInformation(name)[4].lower().startswith("fat"):
            return True

    # Adding media
    # opath must be in unicode

    def addFile(self, opath):
        """Copy the file at path opath to collection.media,

        Name may be changed to ensure unicity.
        with open(opath, "rb") as f:
            return self.writeData(opath, f.read())

    def writeData(self, opath, data, typeHint=None):
        """Add data in the file of name opath in media dir.

        Only file name of opath is keep.
        If file as no extension, and it is jpg or png according to typeHint, then add extension
        Add a number extension if this name already exists

        # if fname is a full path, use only the basename
        fname = os.path.basename(opath)

        # if it's missing an extension and a type hint was provided, use that
        if not os.path.splitext(fname)[1] and typeHint:
            # mimetypes is returning '.jpe' even after calling .init(), so we'll do
            # it manually instead
            typeMap = {
                "image/jpeg": ".jpg",
                "image/png": ".png",
            if typeHint in typeMap:
                fname += typeMap[typeHint]

        # make sure we write it in NFC form (pre-APFS Macs will autoconvert to NFD),
        # and return an NFC-encoded reference
        fname = unicodedata.normalize("NFC", fname)
        # ensure it's a valid filename
        base = self.cleanFilename(fname)
        (root, ext) = os.path.splitext(base)
        def repl(match):
            n = int(match.group(1))
            return " (%d)" % (n+1)
        # find the first available name
        csum = checksum(data)
        while True:
            fname = root + ext
            path = os.path.join(self.dir(), fname)
            # if it doesn't exist, copy it directly
            if not os.path.exists(path):
                with open(path, "wb") as f:
                return fname
            # if it's identical, reuse
            with open(path, "rb") as f:
                if checksum(f.read()) == csum:
                    return fname
            # otherwise, increment the index in the filename
            reg = r" \((\d+)\)$"
            if not re.search(reg, root):
                root = root + " (1)"
                root = re.sub(reg, repl, root)

    # String manipulation

    def filesInStr(self, mid, string, includeRemote=False):
        """The list of media's path in the string. 

        Medias starting with _ are treated as any media.

        Each clozes are expanded in every possible ways. It allows
        for different strings to be created.

        Concerning the part of the string related to LaTeX, media are
        generated as explained in latex._imgLink's docstring

        Keyword arguments:
        mid -- the id of the model of the note whose string is considered
        string -- A string, which corresponds to a field of a note
        includeRemote -- whether the list should include contents which is with http, https or ftp
        l = []
        model = self.col.models.get(mid)
        strings = []
        if model['type'] == MODEL_CLOZE and "{{c" in string:
            # if the field has clozes in it, we'll need to expand the
            # possibilities so we can render latex
            strings = self._expandClozes(string)
            strings = [string]
        for string in strings:
            # handle latex
            string = mungeQA(string, None, None, model, None, self.col)
            # extract filenames
            for reg in self.regexps:
                for match in re.finditer(reg, string):
                    fname = match.group("fname")
                    isLocal = not re.match("(https?|ftp)://", fname.lower())
                    if isLocal or includeRemote:
        return l

    def _expandClozes(self, string):
        """The list of all strings, where the clozes are expanded.

        For each cloze number n, there is a string with cloze n replaced by [...] or by [hint], and every other clozes replaced by their text.

        There is also a text where each cloze are replaced by their value; i.e. the answer"""
        ords = set(re.findall(r"{{c(\d+)::.+?}}", string))
        #The set of clozes occurring in the string
        strings = []
        from anki.template.template import clozeReg
        def qrepl(m):
            """The text by which the cloze m must be replaced in the question."""
            if m.group(4):
                return "[%s]" % m.group(4)
                return "[...]"

            if m.group(3):
                return "[%s]" % m.group(3)
                return "[...]"
        def arepl(m):
            """The text by which the cloze m must be replaced in the answer."""
            return m.group(2)
        for ord in ords:
            s = re.sub(clozeReg%ord, qrepl, string)
            #Replace the cloze number ord by the deletion
            s = re.sub(clozeReg%".+?", "\\2", s)
            #Replace every other clozes by their content
        strings.append(re.sub(clozeReg%".+?", arepl, string))
        return strings

    def transformNames(self, txt, func):
        """Apply func to all subtext matching the regexps txt."""
        for reg in self.regexps:
            txt = re.sub(reg, func, txt)
        return txt

    def strip(self, txt):
        """Delete all text matching the regexps txt"""
        for reg in self.regexps:
            txt = re.sub(reg, "", txt)
        return txt

    def escapeImages(self, string, unescape=False):
        """Replace local image url by replacing special character by the
        escape %xx or reciprocally depending on unescape value."""
        if unescape:
            fn = urllib.parse.unquote
            fn = urllib.parse.quote
        def repl(match):
            tag = match.group(0)
            fname = match.group("fname")
            if re.match("(https?|ftp)://", fname):
                return tag
            return tag.replace(fname, fn(fname))
        for reg in self.imgRegexps:
            string = re.sub(reg, repl, string)
        return string

    # Rebuilding DB

    def check(self, local=None):
        "Return (missingFiles, unusedFiles, warnings)."
        mdir = self.dir()
        # gather all media references in NFC form
        allRefs = set()
        for nid, mid, flds in self.col.db.execute("select id, mid, flds from notes"):
            noteRefs = self.filesInStr(mid, flds)
            # check the refs are in NFC
            for f in noteRefs:
                # if they're not, we'll need to fix them first
                if f != unicodedata.normalize("NFC", f):
                    noteRefs = self.filesInStr(mid, flds)
        # loop through media folder
        unused = []
        if local is None:
            files = os.listdir(mdir)
            files = local
        renamedFiles = False
        dirFound = False
        warnings = []
        for file in files:
            if not local:
                if not os.path.isfile(file):
                    # ignore directories
                    dirFound = True
            if file.startswith("_"):
                # leading _ says to ignore file

            if self.hasIllegal(file):
                name = file.encode(sys.getfilesystemencoding(), errors="replace")
                name = str(name, sys.getfilesystemencoding())
                    _("Invalid file name, please rename: %s") % name)

            nfcFile = unicodedata.normalize("NFC", file)
            # we enforce NFC fs encoding on non-macs
            if not isMac and not local:
                if file != nfcFile:
                    # delete if we already have the NFC form, otherwise rename
                    if os.path.exists(nfcFile):
                        renamedFiles = True
                        os.rename(file, nfcFile)
                        renamedFiles = True
                    file = nfcFile
            # compare
            if nfcFile not in allRefs:
        # if we renamed any files to nfc format, we must rerun the check
        # to make sure the renamed files are not marked as unused
        if renamedFiles:
            return self.check(local=local)
        nohave = [x for x in allRefs if not x.startswith("_")]
        # make sure the media DB is valid
        except DBError:

        if dirFound:
                _("Anki does not support files in subfolders of the collection.media folder."))
        return (nohave, unused, warnings)

    def _normalizeNoteRefs(self, nid):
        note = self.col.getNote(nid)
        for c, fld in enumerate(note.fields):
            nfc = unicodedata.normalize("NFC", fld)
            if nfc != fld:
                note.fields[c] = nfc

    # Copying on import

    def have(self, fname):
        """Whether a fil with name fname exists in the media directory"""
        return os.path.exists(os.path.join(self.dir(), fname))

    # Illegal characters and paths

    _illegalCharReg = re.compile(r'[][><:"/?*^\\|\0\r\n]')

    def stripIllegal(self, str):
        """str, without its illegal characters"""
        return re.sub(self._illegalCharReg, "", str)

    def hasIllegal(self, str):
        """Whether str contains a illegal character.

        Either according to _illegalCharReg, or because it can't be encoded if file system encoding"""
        if re.search(self._illegalCharReg, str):
            return True
        except UnicodeEncodeError:
            return True
        return False

    def cleanFilename(self, fname):
        fname = self.stripIllegal(fname)
        fname = self._cleanWin32Filename(fname)
        fname = self._cleanLongFilename(fname)
        if not fname:
            fname = "renamed"

        return fname

    def _cleanWin32Filename(self, fname):
        if not isWin:
            return fname

        # deal with things like con/prn/etc
        p = pathlib.WindowsPath(fname)
        if p.is_reserved():
            fname = "renamed" + fname
            assert not pathlib.WindowsPath(fname).is_reserved()

        return fname

    def _cleanLongFilename(self, fname):
        # a fairly safe limit that should work on typical windows
        # paths and on eCryptfs partitions, even with a duplicate
        # suffix appended
        namemax = 136

        if isWin:
            pathmax = 240
            pathmax = 1024

        # cap namemax based on absolute path
        dirlen = len(os.path.dirname(os.path.abspath(fname)))
        remaining = pathmax - dirlen
        namemax = min(remaining, namemax)
        assert namemax > 0

        if len(fname) > namemax:
            head, ext = os.path.splitext(fname)
            headmax = namemax - len(ext)
            head = head[0:headmax]
            fname = head + ext
            assert(len(fname) <= namemax)

        return fname

    # Tracking changes

    def findChanges(self):
        "Scan the media folder if it's changed, and note any changes in the db."
        if self._changed():

    def haveDirty(self):
        """Whether the database has at least one dirty element"""
        return self.db.scalar("select 1 from media where dirty=1 limit 1")

    def _mtime(self, path):
        """Time of most recent content modification of file at path.

        Expressed in seconds."""
        return int(os.stat(path).st_mtime)

    def _checksum(self, path):
        """Checksum of file at path"""
        with open(path, "rb") as f:
            return checksum(f.read())

    def _changed(self):
        "Return dir mtime if it has changed since the last findChanges()"
        # doesn't track edits, but user can add or remove a file to update
        mod = self.db.scalar("select dirMod from meta")
        mtime = self._mtime(self.dir())
        if not self._isFAT32() and mod and mod == mtime:
            return False
        return mtime

    def _logChanges(self):
        (added, removed) = self._changes()
        media = []
        for f, mtime in added:
            media.append((f, self._checksum(f), mtime, 1))
        for f in removed:
            media.append((f, None, 0, 1))
        # update media db
        self.db.executemany("insert or replace into media values (?,?,?,?)",
        self.db.execute("update meta set dirMod = ?", self._mtime(self.dir()))

    def _changes(self):
        self.cache = {}
        for (name, csum, mod) in self.db.execute(
            "select fname, csum, mtime from media where csum is not null"):
            # previous entries may not have been in NFC form
            normname = unicodedata.normalize("NFC", name)
            self.cache[normname] = [csum, mod, False]
        added = []
        removed = []
        # loop through on-disk files
        with os.scandir(self.dir()) as it:
            for f in it:
                # ignore folders and thumbs.db
                if f.is_dir():
                if f.name.lower() == "thumbs.db":
                # and files with invalid chars
                if self.hasIllegal(f.name):
                # empty files are invalid; clean them up and continue
                sz = f.stat().st_size
                if not sz:
                if sz > 100*1024*1024:
                    self.col.log("ignoring file over 100MB", f.name)
                # check encoding
                normname = unicodedata.normalize("NFC", f.name)
                if not isMac:
                    if f.name != normname:
                        # wrong filename encoding which will cause sync errors
                        if os.path.exists(normname):
                            os.rename(f.name, normname)
                    # on Macs we can access the file using any normalization

                # newly added?
                mtime = int(f.stat().st_mtime)
                if normname not in self.cache:
                    added.append((normname, mtime))
                    # modified since last time?
                    if mtime != self.cache[normname][1]:
                        # and has different checksum?
                        if self._checksum(normname) != self.cache[normname][0]:
                            added.append((normname, mtime))
                    # mark as used
                    self.cache[normname][2] = True
        # look for any entries in the cache that no longer exist on disk
        for (k, v) in list(self.cache.items()):
            if not v[2]:
        return added, removed

    # Syncing-related

    def lastUsn(self):
        return self.db.scalar("select lastUsn from meta")

    def setLastUsn(self, usn):
        self.db.execute("update meta set lastUsn = ?", usn)

    def syncInfo(self, fname):
        """(Checkusm, dirty number) from media with name fname"""
        ret = self.db.first(
            "select csum, dirty from media where fname=?", fname)
        return ret or (None, 0)

    def markClean(self, fnames):
        for fname in fnames:
                "update media set dirty=0 where fname=?", fname)

    def syncDelete(self, fname):
        """Delete the file fname if it is not in media directory."""
        if os.path.exists(fname):
        self.db.execute("delete from media where fname=?", fname)

    def mediaCount(self):
        """Number of media according to database"""
        return self.db.scalar(
            "select count() from media where csum is not null")

    def dirtyCount(self):
        """Number of dirty media according to database.

        (couting the one potentially deleted)"""
        return self.db.scalar(
            "select count() from media where dirty=1")

    def forceResync(self):
        self.db.execute("delete from media")
        self.db.execute("update meta set lastUsn=0,dirMod=0")

    # Media syncing: zips

    def mediaChangesZip(self):
        The pair with:
        * A string encoding a zip files with:
        ** media to upload
        ** _meta: a json list associating to each name (as in zip) to
        the real name of the file
        * list of media considered
        f = io.BytesIO()
        z = zipfile.ZipFile(f, "w", compression=zipfile.ZIP_DEFLATED)

        fnames = []
        # meta is list of (fname, zipname), where zipname of None
        # is a deleted file
        meta = []
        sz = 0#sum of the size of the media.

        # loop over dirty medias. At most SYNC_ZIP_COUNT = 25 elements
        for c, (fname, csum) in enumerate(self.db.execute(
                        "select fname, csum from media where dirty=1"
                        " limit %d"%SYNC_ZIP_COUNT)):

            normname = unicodedata.normalize("NFC", fname)

            if csum:
                self.col.log("+media zip", fname)
                z.write(fname, str(c))
                meta.append((normname, str(c)))
                sz += os.path.getsize(fname)
                self.col.log("-media zip", fname)
                meta.append((normname, ""))

            if sz >= SYNC_ZIP_SIZE:

        z.writestr("_meta", json.dumps(meta))
        return f.getvalue(), fnames

    def addFilesFromZip(self, zipData):
        Copy each file from zipData (except _meta) to the media
        folder, and add those files to the media database. Rename the
        file according to _meta.

        zipData -- A byte tream containing a zipfile, containing:
        * _meta, a file containing a json dict associtaing to each name of file in zip (except meta) a name to be used in the media folder
        * arbitrary fields to save in the media folder
        f = io.BytesIO(zipData)
        z = zipfile.ZipFile(f, "r")
        media = []
        # get meta info first
        meta = json.loads(z.read("_meta").decode("utf8"))
        # then loop through all files
        cnt = 0
        for i in z.infolist():
            if i.filename == "_meta":
                # ignore previously-retrieved meta
                data = z.read(i)
                csum = checksum(data)
                name = meta[i.filename]
                # normalize name
                name = unicodedata.normalize("NFC", name)
                # save file
                with open(name, "wb") as f:
                # update db
                media.append((name, csum, self._mtime(name), 0))
                cnt += 1
        if media:
                "insert or replace into media values (?,?,?,?)", media)
        return cnt
class MediaManager(object):

    soundRegexps = ["(?i)(\[sound:(?P<fname>[^]]+)\])"]
    imgRegexps = [
        # src element quoted case
        "(?i)(<img[^>]* src=(?P<str>[\"'])(?P<fname>[^>]+?)(?P=str)[^>]*>)",
        # unquoted case
        "(?i)(<img[^>]* src=(?!['\"])(?P<fname>[^ >]+)[^>]*?>)",
    regexps = soundRegexps + imgRegexps

    def __init__(self, col, server):
        self.col = col
        if server:
            self._dir = None
        # media directory
        self._dir = re.sub("(?i)\.(anki2)$", ".media", self.col.path)
        # convert dir to unicode if it's not already
        if isinstance(self._dir, str):
            self._dir = str(self._dir)
        if not os.path.exists(self._dir):
            self._oldcwd = os.getcwd()
        except OSError:
            # cwd doesn't exist
            self._oldcwd = None
        except OSError:
            raise Exception("invalidTempFolder")
        # change database

    def connect(self):
        if self.col.server:
        path = self.dir() + ".db2"
        create = not os.path.exists(path)
        self.db = DB(path)
        if create:

    def _initDB(self):
create table media (
 fname text not null primary key,
 csum text,           -- null indicates deleted file
 mtime int not null,  -- zero if deleted
 dirty int not null

create index idx_media_dirty on media (dirty);

create table meta (dirMod int, lastUsn int); insert into meta values (0, 0);

    def maybeUpgrade(self):
        oldpath = self.dir() + ".db"
        if os.path.exists(oldpath):
            self.db.execute('attach "../collection.media.db" as old')
    insert into media
     select m.fname, csum, mod, ifnull((select 1 from log l2 where l2.fname=m.fname), 0) as dirty
     from old.media m
     left outer join old.log l using (fname)
     select fname, null, 0, 1 from old.log where type=1;""")
                self.db.execute("delete from meta")
    insert into meta select dirMod, usn from old.meta
            except Exception as e:
                # if we couldn't import the old db for some reason, just start
                # anew
                self.col.log("failed to import old media db:" +
            self.db.execute("detach old")
            npath = "../collection.media.db.old"
            if os.path.exists(npath):
            os.rename("../collection.media.db", npath)

    def close(self):
        if self.col.server:
        self.db = None
        # change cwd back to old location
        if self._oldcwd:
                # may have been deleted

    def dir(self):
        return self._dir

    def _isFAT32(self):
        if not isWin:
        import win32api, win32file
            name = win32file.GetVolumeNameForVolumeMountPoint(self._dir[:3])
            # mapped & unmapped network drive; pray that it's not vfat
        if win32api.GetVolumeInformation(name)[4].lower().startswith("fat"):
            return True

    # Adding media
    # opath must be in unicode

    def addFile(self, opath):
        return self.writeData(opath, open(opath, "rb").read())

    def writeData(self, opath, data):
        # if fname is a full path, use only the basename
        fname = os.path.basename(opath)
        # make sure we write it in NFC form (on mac will autoconvert to NFD),
        # and return an NFC-encoded reference
        fname = unicodedata.normalize("NFC", fname)
        # remove any dangerous characters
        base = self.stripIllegal(fname)
        (root, ext) = os.path.splitext(base)

        def repl(match):
            n = int(match.group(1))
            return " (%d)" % (n + 1)

        # find the first available name
        csum = checksum(data)
        while True:
            fname = root + ext
            path = os.path.join(self.dir(), fname)
            # if it doesn't exist, copy it directly
            if not os.path.exists(path):
                open(path, "wb").write(data)
                return fname
            # if it's identical, reuse
            if checksum(open(path, "rb").read()) == csum:
                return fname
            # otherwise, increment the index in the filename
            reg = " \((\d+)\)$"
            if not re.search(reg, root):
                root = root + " (1)"
                root = re.sub(reg, repl, root)

    # String manipulation

    def filesInStr(self, mid, string, includeRemote=False):
        from anki.latex import mungeQA
        l = []
        model = self.col.models.get(mid)
        strings = []
        if model['type'] == MODEL_CLOZE and "{{c" in string:
            # if the field has clozes in it, we'll need to expand the
            # possibilities so we can render latex
            strings = self._expandClozes(string)
            strings = [string]
        for string in strings:
            # handle latex
            string = mungeQA(string, None, None, model, None, self.col)
            # extract filenames
            for reg in self.regexps:
                for match in re.finditer(reg, string):
                    fname = match.group("fname")
                    isLocal = not re.match("(https?|ftp)://", fname.lower())
                    if isLocal or includeRemote:
        return l

    def _expandClozes(self, string):
        ords = set(re.findall("{{c(\d+)::.+?}}", string))
        strings = []
        from anki.template.template import clozeReg

        def qrepl(m):
            if m.group(3):
                return "[%s]" % m.group(3)
                return "[...]"

        def arepl(m):
            return m.group(1)

        for ord in ords:
            s = re.sub(clozeReg % ord, qrepl, string)
            s = re.sub(clozeReg % ".+?", "\\1", s)
        strings.append(re.sub(clozeReg % ".+?", arepl, string))
        return strings

    def transformNames(self, txt, func):
        for reg in self.regexps:
            txt = re.sub(reg, func, txt)
        return txt

    def strip(self, txt):
        for reg in self.regexps:
            txt = re.sub(reg, "", txt)
        return txt

    def escapeImages(self, string, unescape=False):
        if unescape:
            fn = urllib.parse.unquote
            fn = urllib.parse.quote

        def repl(match):
            tag = match.group(0)
            fname = match.group("fname")
            if re.match("(https?|ftp)://", fname):
                return tag
            return tag.replace(fname, str(fn(fname.encode("utf-8")), "utf8"))

        for reg in self.imgRegexps:
            string = re.sub(reg, repl, string)
        return string

    # Rebuilding DB

    def check(self, local=None):
        "Return (missingFiles, unusedFiles)."
        mdir = self.dir()
        # gather all media references in NFC form
        allRefs = set()
        for nid, mid, flds in self.col.db.execute(
                "select id, mid, flds from notes"):
            noteRefs = self.filesInStr(mid, flds)
            # check the refs are in NFC
            for f in noteRefs:
                # if they're not, we'll need to fix them first
                if f != unicodedata.normalize("NFC", f):
                    noteRefs = self.filesInStr(mid, flds)
        # loop through media folder
        unused = []
        invalid = []
        if local is None:
            files = os.listdir(mdir)
            files = local
        renamedFiles = False
        for file in files:
            if not local:
                if not os.path.isfile(file):
                    # ignore directories
            if file.startswith("_"):
                # leading _ says to ignore file
            if not isinstance(file, str):
                    str(file, sys.getfilesystemencoding(), "replace"))
            nfcFile = unicodedata.normalize("NFC", file)
            # we enforce NFC fs encoding on non-macs; on macs we'll have gotten
            # NFD so we use the above variable for comparing references
            if not isMac and not local:
                if file != nfcFile:
                    # delete if we already have the NFC form, otherwise rename
                    if os.path.exists(nfcFile):
                        renamedFiles = True
                        os.rename(file, nfcFile)
                        renamedFiles = True
                    file = nfcFile
            # compare
            if nfcFile not in allRefs:
        # if we renamed any files to nfc format, we must rerun the check
        # to make sure the renamed files are not marked as unused
        if renamedFiles:
            return self.check(local=local)
        nohave = [x for x in allRefs if not x.startswith("_")]
        return (nohave, unused, invalid)

    def _normalizeNoteRefs(self, nid):
        note = self.col.getNote(nid)
        for c, fld in enumerate(note.fields):
            nfc = unicodedata.normalize("NFC", fld)
            if nfc != fld:
                note.fields[c] = nfc

    # Copying on import

    def have(self, fname):
        return os.path.exists(os.path.join(self.dir(), fname))

    # Illegal characters

    _illegalCharReg = re.compile(r'[][><:"/?*^\\|\0\r\n]')

    def stripIllegal(self, str):
        return re.sub(self._illegalCharReg, "", str)

    def hasIllegal(self, str):
        # a file that couldn't be decoded to unicode is considered invalid
        if not isinstance(str, str):
            return True
        return not not re.search(self._illegalCharReg, str)

    # Tracking changes

    def findChanges(self):
        "Scan the media folder if it's changed, and note any changes."
        if self._changed():

    def haveDirty(self):
        return self.db.scalar("select 1 from media where dirty=1 limit 1")

    def _mtime(self, path):
        return int(os.stat(path).st_mtime)

    def _checksum(self, path):
        return checksum(open(path, "rb").read())

    def _changed(self):
        "Return dir mtime if it has changed since the last findChanges()"
        # doesn't track edits, but user can add or remove a file to update
        mod = self.db.scalar("select dirMod from meta")
        mtime = self._mtime(self.dir())
        if not self._isFAT32() and mod and mod == mtime:
            return False
        return mtime

    def _logChanges(self):
        (added, removed) = self._changes()
        media = []
        for f in added:
            mt = self._mtime(f)
            media.append((f, self._checksum(f), mt, 1))
        for f in removed:
            media.append((f, None, 0, 1))
        # update media db
        self.db.executemany("insert or replace into media values (?,?,?,?)",
        self.db.execute("update meta set dirMod = ?", self._mtime(self.dir()))

    def _changes(self):
        self.cache = {}
        for (name, csum, mod) in self.db.execute(
                "select fname, csum, mtime from media where csum is not null"):
            self.cache[name] = [csum, mod, False]
        added = []
        removed = []
        # loop through on-disk files
        for f in os.listdir(self.dir()):
            # ignore folders and thumbs.db
            if os.path.isdir(f):
            if f.lower() == "thumbs.db":
            # and files with invalid chars
            if self.hasIllegal(f):
            # empty files are invalid; clean them up and continue
            sz = os.path.getsize(f)
            if not sz:
            if sz > 100 * 1024 * 1024:
                self.col.log("ignoring file over 100MB", f)
            # check encoding
            if not isMac:
                normf = unicodedata.normalize("NFC", f)
                if f != normf:
                    # wrong filename encoding which will cause sync errors
                    if os.path.exists(normf):
                        os.rename(f, normf)
            # newly added?
            if f not in self.cache:
                # modified since last time?
                if self._mtime(f) != self.cache[f][1]:
                    # and has different checksum?
                    if self._checksum(f) != self.cache[f][0]:
                # mark as used
                self.cache[f][2] = True
        # look for any entries in the cache that no longer exist on disk
        for (k, v) in list(self.cache.items()):
            if not v[2]:
        return added, removed

    # Syncing-related

    def lastUsn(self):
        return self.db.scalar("select lastUsn from meta")

    def setLastUsn(self, usn):
        self.db.execute("update meta set lastUsn = ?", usn)

    def syncInfo(self, fname):
        ret = self.db.first("select csum, dirty from media where fname=?",
        return ret or (None, 0)

    def markClean(self, fnames):
        for fname in fnames:
            self.db.execute("update media set dirty=0 where fname=?", fname)

    def syncDelete(self, fname):
        if os.path.exists(fname):
        self.db.execute("delete from media where fname=?", fname)

    def mediaCount(self):
        return self.db.scalar(
            "select count() from media where csum is not null")

    def dirtyCount(self):
        return self.db.scalar("select count() from media where dirty=1")

    def forceResync(self):
        self.db.execute("delete from media")
        self.db.execute("update meta set lastUsn=0,dirMod=0")
        self.db.execute("vacuum analyze")

    # Media syncing: zips

    def mediaChangesZip(self):
        from io import StringIO
        f = StringIO()
        z = zipfile.ZipFile(f, "w", compression=zipfile.ZIP_DEFLATED)

        fnames = []
        # meta is list of (fname, zipname), where zipname of None
        # is a deleted file
        meta = []
        sz = 0

        for c, (fname, csum) in enumerate(
                self.db.execute("select fname, csum from media where dirty=1"
                                " limit %d" % SYNC_ZIP_COUNT)):

            normname = unicodedata.normalize("NFC", fname)

            if csum:
                self.col.log("+media zip", fname)
                z.write(fname, str(c))
                meta.append((normname, str(c)))
                sz += os.path.getsize(fname)
                self.col.log("-media zip", fname)
                meta.append((normname, ""))

            if sz >= SYNC_ZIP_SIZE:

        z.writestr("_meta", json.dumps(meta))
        return f.getvalue(), fnames

    def addFilesFromZip(self, zipData):
        "Extract zip data; true if finished."
        from io import StringIO
        f = StringIO(zipData)
        z = zipfile.ZipFile(f, "r")
        media = []
        # get meta info first
        meta = json.loads(z.read("_meta"))
        # then loop through all files
        cnt = 0
        for i in z.infolist():
            if i.filename == "_meta":
                # ignore previously-retrieved meta
                data = z.read(i)
                csum = checksum(data)
                name = meta[i.filename]
                if not isinstance(name, str):
                    name = str(name, "utf8")
                # normalize name for platform
                if isMac:
                    name = unicodedata.normalize("NFD", name)
                    name = unicodedata.normalize("NFC", name)
                # save file
                open(name, "wb").write(data)
                # update db
                media.append((name, csum, self._mtime(name), 0))
                cnt += 1
        if media:
                "insert or replace into media values (?,?,?,?)", media)
        return cnt
Пример #6
class MediaManager(object):

    soundRegexps = ["(?i)(\[sound:(?P<fname>[^]]+)\])"]
    imgRegexps = [
        # src element quoted case
        "(?i)(<img[^>]* src=(?P<str>[\"'])(?P<fname>[^>]+?)(?P=str)[^>]*>)",
        # unquoted case
        "(?i)(<img[^>]* src=(?!['\"])(?P<fname>[^ >]+)[^>]*?>)",
    regexps = soundRegexps + imgRegexps

    def __init__(self, col):
        self.col = col
        # media directory
        self._dir = re.sub("(?i)\.(anki2)$", ".media", self.col.path)
        # convert dir to unicode if it's not already
        if isinstance(self._dir, str):
            self._dir = unicode(self._dir, sys.getfilesystemencoding())
        if not os.path.exists(self._dir):
            self._oldcwd = os.getcwd()
        except OSError:
            # cwd doesn't exist
            self._oldcwd = None
        except OSError:
            raise Exception("invalidTempFolder")
        # change database

    def connect(self):
        path = self.dir() + ".db2"
        create = not os.path.exists(path)
        self.db = DB(path)
        if create:

    def _initDB(self):
create table media (
 fname text not null primary key,
 csum text,           -- null indicates deleted file
 mtime int not null,  -- zero if deleted
 dirty int not null

create index idx_media_dirty on media (dirty);

create table meta (dirMod int, lastUsn int); insert into meta values (0, 0);

    def maybeUpgrade(self):
        oldpath = self.dir() + ".db"
        if os.path.exists(oldpath):
            self.db.execute('attach "../collection.media.db" as old')
    insert into media
     select m.fname, csum, mod, ifnull((select 1 from log l2 where l2.fname=m.fname), 0) as dirty
     from old.media m
     left outer join old.log l using (fname)
     select fname, null, 0, 1 from old.log where type=1;""")
                self.db.execute("delete from meta")
    insert into meta select dirMod, usn from old.meta
            except Exception, e:
                # if we couldn't import the old db for some reason, just start
                # anew
                self.col.log("failed to import old media db:" +
            self.db.execute("detach old")
            npath = "../collection.media.db.old"
            if os.path.exists(npath):
            os.rename("../collection.media.db", npath)
Пример #7
class MediaManager(object):

    soundRegexps = ["(?i)(\[sound:(?P<fname>[^]]+)\])"]
    imgRegexps = [
        # src element quoted case
        # unquoted case
        "(?i)(<img[^>]+src=(?!['\"])(?P<fname>[^ >]+)[^>]*?>)",
    regexps = soundRegexps + imgRegexps

    def __init__(self, col, server):
        self.col = col
        if server:
            self._dir = None
        # media directory
        self._dir = re.sub("(?i)\.(anki2)$", ".media", self.col.path)
        # convert dir to unicode if it's not already
        if isinstance(self._dir, str):
            self._dir = unicode(self._dir, sys.getfilesystemencoding())
        if not os.path.exists(self._dir):
            self._oldcwd = os.getcwd()
        except OSError:
            # cwd doesn't exist
            self._oldcwd = None
        # change database

    def connect(self):
        if self.col.server:
        path = self.dir() + ".db"
        create = not os.path.exists(path)
        self.db = DB(path)
        if create:

    def close(self):
        if self.col.server:
        self.db = None
        # change cwd back to old location
        if self._oldcwd:
                # may have been deleted

    def dir(self):
        return self._dir

    def _isFAT32(self):
        if not isWin:
        import win32api, win32file
            name = win32file.GetVolumeNameForVolumeMountPoint(self._dir[:3])
            # mapped & unmapped network drive; pray that it's not vfat
        if win32api.GetVolumeInformation(name)[4].lower().startswith("fat"):
            return True

    # Adding media

    def addFile(self, opath):
        return self.writeData(opath, open(opath, "rb").read())

    def writeData(self, opath, data):
        # if fname is a full path, use only the basename
        fname = os.path.basename(opath)
        # remove any dangerous characters
        base = self.stripIllegal(fname)
        (root, ext) = os.path.splitext(base)

        def repl(match):
            n = int(match.group(1))
            return " (%d)" % (n + 1)

        # find the first available name
        csum = checksum(data)
        while True:
            fname = root + ext
            path = os.path.join(self.dir(), fname)
            # if it doesn't exist, copy it directly
            if not os.path.exists(path):
                open(path, "wb").write(data)
                return fname
            # if it's identical, reuse
            if checksum(open(path, "rb").read()) == csum:
                return fname
            # otherwise, increment the index in the filename
            reg = " \((\d+)\)$"
            if not re.search(reg, root):
                root = root + " (1)"
                root = re.sub(reg, repl, root)

    # String manipulation

    def filesInStr(self, mid, string, includeRemote=False):
        l = []
        model = self.col.models.get(mid)
        strings = []
        if model['type'] == MODEL_CLOZE and "{{c" in string:
            # if the field has clozes in it, we'll need to expand the
            # possibilities so we can render latex
            strings = self._expandClozes(string)
            strings = [string]
        for string in strings:
            # handle latex
            string = mungeQA(string, None, None, model, None, self.col)
            # extract filenames
            for reg in self.regexps:
                for match in re.finditer(reg, string):
                    fname = match.group("fname")
                    isLocal = not re.match("(https?|ftp)://", fname.lower())
                    if isLocal or includeRemote:
        return l

    def _expandClozes(self, string):
        ords = set(re.findall("{{c(\d+)::.+?}}", string))
        strings = []
        from anki.template.template import clozeReg

        def qrepl(m):
            if m.group(3):
                return "[%s]" % m.group(3)
                return "[...]"

        def arepl(m):
            return m.group(1)

        for ord in ords:
            s = re.sub(clozeReg % ord, qrepl, string)
            s = re.sub(clozeReg % ".+?", "\\1", s)
        strings.append(re.sub(clozeReg % ".+?", arepl, string))
        return strings

    def transformNames(self, txt, func):
        for reg in self.regexps:
            txt = re.sub(reg, func, txt)
        return txt

    def strip(self, txt):
        for reg in self.regexps:
            txt = re.sub(reg, "", txt)
        return txt

    def escapeImages(self, string):
        def repl(match):
            tag = match.group(0)
            fname = match.group("fname")
            if re.match("(https?|ftp)://", fname):
                return tag
            return tag.replace(fname, urllib.quote(fname.encode("utf-8")))

        for reg in self.imgRegexps:
            string = re.sub(reg, repl, string)
        return string

    # Rebuilding DB

    def check(self, local=None):
        "Return (missingFiles, unusedFiles)."
        mdir = self.dir()
        # generate card q/a and look through all references
        normrefs = {}

        def norm(s):
            if isinstance(s, unicode) and isMac:
                return unicodedata.normalize('NFD', s)
            return s

        for f in self.allMedia():
            normrefs[norm(f)] = True
        # loop through directory and find unused & missing media
        unused = []
        if local is None:
            files = os.listdir(mdir)
            files = local
        for file in files:
            if not local:
                path = os.path.join(mdir, file)
                if not os.path.isfile(path):
                    # ignore directories
                if file.startswith("_"):
                    # leading _ says to ignore file
            nfile = norm(file)
            if nfile not in normrefs:
                del normrefs[nfile]
        nohave = [x for x in normrefs.keys() if not x.startswith("_")]
        return (nohave, unused)

    def allMedia(self):
        "Return a set of all referenced filenames."
        files = set()
        for mid, flds in self.col.db.execute("select mid, flds from notes"):
            for f in self.filesInStr(mid, flds):
        return files

    # Copying on import

    def have(self, fname):
        return os.path.exists(os.path.join(self.dir(), fname))

    # Media syncing - changes and removal

    def hasChanged(self):
        return self.db.scalar("select 1 from log limit 1")

    def removed(self):
        return self.db.list("select * from log where type = ?", MEDIA_REM)

    def syncRemove(self, fnames):
        # remove provided deletions
        for f in fnames:
            if os.path.exists(f):
            self.db.execute("delete from log where fname = ?", f)
            self.db.execute("delete from media where fname = ?", f)
        # and all locally-logged deletions, as server has acked them
        self.db.execute("delete from log where type = ?", MEDIA_REM)

    # Media syncing - unbundling zip files from server

    def syncAdd(self, zipData):
        "Extract zip data; true if finished."
        f = StringIO(zipData)
        z = zipfile.ZipFile(f, "r")
        finished = False
        meta = None
        media = []
        # get meta info first
        meta = json.loads(z.read("_meta"))
        nextUsn = int(z.read("_usn"))
        # then loop through all files
        for i in z.infolist():
            if i.filename == "_meta" or i.filename == "_usn":
                # ignore previously-retrieved meta
            elif i.filename == "_finished":
                # last zip in set
                finished = True
                data = z.read(i)
                csum = checksum(data)
                name = meta[i.filename]
                # save file
                open(name, "wb").write(data)
                # update db
                media.append((name, csum, self._mtime(name)))
                # remove entries from local log
                self.db.execute("delete from log where fname = ?", name)
        # update media db and note new starting usn
        if media:
            self.db.executemany("insert or replace into media values (?,?,?)",
        self.setUsn(nextUsn)  # commits
        # if we have finished adding, we need to record the new folder mtime
        # so that we don't trigger a needless scan
        if finished:
        return finished

    # Illegal characters

    _illegalCharReg = re.compile(r'[][><:"/?*^\\|\0]')

    def stripIllegal(self, str):
        return re.sub(self._illegalCharReg, "", str)

    def hasIllegal(self, str):
        return not not re.search(self._illegalCharReg, str)

    # Media syncing - bundling zip files to send to server
    # Because there's no standard filename encoding for zips, and because not
    # all zip clients support retrieving mtime, we store the files as ascii
    # and place a json file in the zip with the necessary information.

    def zipAdded(self):
        "Add files to a zip until over SYNC_ZIP_SIZE/COUNT. Return zip data."
        f = StringIO()
        z = zipfile.ZipFile(f, "w", compression=zipfile.ZIP_DEFLATED)
        sz = 0
        cnt = 0
        files = {}
        cur = self.db.execute("select fname from log where type = ?",
        fnames = []
        while 1:
            fname = cur.fetchone()
            if not fname:
                # add a flag so the server knows it can clean up
                z.writestr("_finished", "")
            fname = fname[0]
            z.write(fname, str(cnt))
            files[str(cnt)] = fname
            sz += os.path.getsize(fname)
            if sz > SYNC_ZIP_SIZE or cnt > SYNC_ZIP_COUNT:
            cnt += 1
        z.writestr("_meta", json.dumps(files))
        return f.getvalue(), fnames

    def forgetAdded(self, fnames):
        if not fnames:
        self.db.executemany("delete from log where fname = ?", fnames)

    # Tracking changes (private)

    def _initDB(self):
create table media (fname text primary key, csum text, mod int);
create table meta (dirMod int, usn int); insert into meta values (0, 0);
create table log (fname text primary key, type int);

    def _mtime(self, path):
        return int(os.stat(path).st_mtime)

    def _checksum(self, path):
        return checksum(open(path, "rb").read())

    def usn(self):
        return self.db.scalar("select usn from meta")

    def setUsn(self, usn):
        self.db.execute("update meta set usn = ?", usn)

    def syncMod(self):
        self.db.execute("update meta set dirMod = ?", self._mtime(self.dir()))

    def _changed(self):
        "Return dir mtime if it has changed since the last findChanges()"
        # doesn't track edits, but user can add or remove a file to update
        mod = self.db.scalar("select dirMod from meta")
        mtime = self._mtime(self.dir())
        if not self._isFAT32() and mod and mod == mtime:
            return False
        return mtime

    def findChanges(self):
        "Scan the media folder if it's changed, and note any changes."
        if self._changed():

    def _logChanges(self):
        (added, removed) = self._changes()
        log = []
        media = []
        mediaRem = []
        for f in added:
            mt = self._mtime(f)
            media.append((f, self._checksum(f), mt))
            log.append((f, MEDIA_ADD))
        for f in removed:
            mediaRem.append((f, ))
            log.append((f, MEDIA_REM))
        # update media db
        self.db.executemany("insert or replace into media values (?,?,?)",
        if mediaRem:
            self.db.executemany("delete from media where fname = ?", mediaRem)
        self.db.execute("update meta set dirMod = ?", self._mtime(self.dir()))
        # and logs
        self.db.executemany("insert or replace into log values (?,?)", log)

    def _changes(self):
        self.cache = {}
        for (name, csum, mod) in self.db.execute("select * from media"):
            self.cache[name] = [csum, mod, False]
        added = []
        removed = []
        # loop through on-disk files
        for f in os.listdir(self.dir()):
            # ignore folders and thumbs.db
            if os.path.isdir(f):
            if f.lower() == "thumbs.db":
            # and files with invalid chars
            if self.hasIllegal(f):
            # empty files are invalid; clean them up and continue
            if not os.path.getsize(f):
            # newly added?
            if f not in self.cache:
                # modified since last time?
                if self._mtime(f) != self.cache[f][1]:
                    # and has different checksum?
                    if self._checksum(f) != self.cache[f][0]:
                # mark as used
                self.cache[f][2] = True
        # look for any entries in the cache that no longer exist on disk
        for (k, v) in self.cache.items():
            if not v[2]:
        return added, removed

    def sanityCheck(self):
        assert not self.db.scalar("select count() from log")
        cnt = self.db.scalar("select count() from media")
        return cnt

    def forceResync(self):
        self.db.execute("delete from media")
        self.db.execute("delete from log")
        self.db.execute("update meta set usn = 0, dirMod = 0")

    def removeExisting(self, files):
        "Remove files from list of files to sync, and return missing files."
        need = []
        remove = []
        for f in files:
            if self.db.scalar("select 1 from log where fname=?", f):
                remove.append((f, ))
        self.db.executemany("delete from log where fname=?", remove)
        # if we need all the server files, it's faster to pass None than
        # the full list
        if need and len(files) == len(need):
            return None
        return need
Пример #8
class MediaManager(object):

    soundRegexps = ["(?i)(\[sound:(?P<fname>[^]]+)\])"]
    imgRegexps = [
        # src element quoted case
        # unquoted case
        "(?i)(<img[^>]+src=(?!['\"])(?P<fname>[^ >]+)[^>]*?>)",
    regexps = soundRegexps + imgRegexps

    def __init__(self, col, server):
        self.col = col
        if server:
            self._dir = None
        # media directory
        self._dir = re.sub("(?i)\.(anki2)$", ".media", self.col.path)
        # convert dir to unicode if it's not already
        if isinstance(self._dir, str):
            self._dir = unicode(self._dir, sys.getfilesystemencoding())
        if not os.path.exists(self._dir):
            self._oldcwd = os.getcwd()
        except OSError:
            # cwd doesn't exist
            self._oldcwd = None
        # change database

    def connect(self):
        if self.col.server:
        path = self.dir()+".db"
        create = not os.path.exists(path)
        self.db = DB(path)
        if create:

    def close(self):
        if self.col.server:
        self.db = None
        # change cwd back to old location
        if self._oldcwd:
                # may have been deleted

    def dir(self):
        return self._dir

    def _isFAT32(self):
        if not isWin:
        import win32api, win32file
        name = win32file.GetVolumeNameForVolumeMountPoint(self._dir[:3])
        if win32api.GetVolumeInformation(name)[4].lower().startswith("fat"):
            return True

    # Adding media

    def addFile(self, opath):
        """Copy PATH to MEDIADIR, and return new filename.
If the same name exists, compare checksums."""
        mdir = self.dir()
        # remove any dangerous characters
        base = re.sub(r"[][<>:/\\&?\"\|]", "", os.path.basename(opath))
        (root, ext) = os.path.splitext(base)
        def repl(match):
            n = int(match.group(1))
            return " (%d)" % (n+1)
        # find the first available name
        while True:
            path = os.path.join(mdir, root + ext)
            # if it doesn't exist, copy it directly
            if not os.path.exists(path):
                shutil.copyfile(opath, path)
                return os.path.basename(os.path.basename(path))
            # if it's identical, reuse
            if self.filesIdentical(opath, path):
                return os.path.basename(path)
            # otherwise, increment the index in the filename
            reg = " \((\d+)\)$"
            if not re.search(reg, root):
                root = root + " (1)"
                root = re.sub(reg, repl, root)

    def filesIdentical(self, path1, path2):
        "True if files are the same."
        return (checksum(open(path1, "rb").read()) ==
                checksum(open(path2, "rb").read()))

    # String manipulation

    def filesInStr(self, mid, string, includeRemote=False):
        l = []
        model = self.col.models.get(mid)
        strings = []
        if model['type'] == MODEL_CLOZE and "{{c" in string:
            # if the field has clozes in it, we'll need to expand the
            # possibilities so we can render latex
            strings = self._expandClozes(string)
            strings = [string]
        for string in strings:
            # handle latex
            string = mungeQA(string, None, None, model, None, self.col)
            # extract filenames
            for reg in self.regexps:
                for match in re.finditer(reg, string):
                    fname = match.group("fname")
                    isLocal = not re.match("(https?|ftp)://", fname.lower())
                    if isLocal or includeRemote:
        return l

    def _expandClozes(self, string):
        ords = set(re.findall("{{c(\d+)::.+?}}", string))
        strings = []
        from anki.template.template import clozeReg
        def qrepl(m):
            if m.group(3):
                return "[%s]" % m.group(3)
                return "[...]"
        def arepl(m):
            return m.group(1)
        for ord in ords:
            s = re.sub(clozeReg%ord, qrepl, string)
            s = re.sub(clozeReg%".+?", "\\1", s)
        strings.append(re.sub(clozeReg%".+?", arepl, string))
        return strings

    def transformNames(self, txt, func):
        for reg in self.regexps:
            txt = re.sub(reg, func, txt)
        return txt

    def strip(self, txt):
        for reg in self.regexps:
            txt = re.sub(reg, "", txt)
        return txt

    def escapeImages(self, string):
        def repl(match):
            tag = match.group(0)
            fname = match.group("fname")
            if re.match("(https?|ftp)://", fname):
                return tag
            return tag.replace(
                fname, urllib.quote(fname.encode("utf-8")))
        for reg in self.imgRegexps:
            string = re.sub(reg, repl, string)
        return string

    # Rebuilding DB

    def check(self, local=None):
        "Return (missingFiles, unusedFiles)."
        mdir = self.dir()
        # generate card q/a and look through all references
        normrefs = {}
        def norm(s):
            if isinstance(s, unicode) and isMac:
                return unicodedata.normalize('NFD', s)
            return s
        for f in self.allMedia():
            normrefs[norm(f)] = True
        # loop through directory and find unused & missing media
        unused = []
        if local is None:
            files = os.listdir(mdir)
            files = local
        for file in files:
            if not local:
                path = os.path.join(mdir, file)
                if not os.path.isfile(path):
                    # ignore directories
                if file.startswith("_"):
                    # leading _ says to ignore file
            nfile = norm(file)
            if nfile not in normrefs:
                del normrefs[nfile]
        nohave = [x for x in normrefs.keys() if not x.startswith("_")]
        return (nohave, unused)

    def allMedia(self):
        "Return a set of all referenced filenames."
        files = set()
        for mid, flds in self.col.db.execute("select mid, flds from notes"):
            for f in self.filesInStr(mid, flds):
        return files

    # Copying on import

    def have(self, fname):
        return os.path.exists(os.path.join(self.dir(), fname))

    # Media syncing - changes and removal

    def hasChanged(self):
        return self.db.scalar("select 1 from log limit 1")

    def removed(self):
        return self.db.list("select * from log where type = ?", MEDIA_REM)

    def syncRemove(self, fnames):
        # remove provided deletions
        for f in fnames:
            if os.path.exists(f):
            self.db.execute("delete from log where fname = ?", f)
            self.db.execute("delete from media where fname = ?", f)
        # and all locally-logged deletions, as server has acked them
        self.db.execute("delete from log where type = ?", MEDIA_REM)

    # Media syncing - unbundling zip files from server

    def syncAdd(self, zipData):
        "Extract zip data; true if finished."
        f = StringIO(zipData)
        z = zipfile.ZipFile(f, "r")
        finished = False
        meta = None
        media = []
        sizecnt = 0
        # get meta info first
        assert z.getinfo("_meta").file_size < 100000
        meta = json.loads(z.read("_meta"))
        nextUsn = int(z.read("_usn"))
        # then loop through all files
        for i in z.infolist():
            # check for zip bombs
            sizecnt += i.file_size
            assert sizecnt < 100*1024*1024
            if i.filename == "_meta" or i.filename == "_usn":
                # ignore previously-retrieved meta
            elif i.filename == "_finished":
                # last zip in set
                finished = True
                data = z.read(i)
                csum = checksum(data)
                name = meta[i.filename]
                # can we store the file on this system?
                if self.illegal(name):
                # save file
                open(name, "wb").write(data)
                # update db
                media.append((name, csum, self._mtime(name)))
                # remove entries from local log
                self.db.execute("delete from log where fname = ?", name)
        # update media db and note new starting usn
        if media:
                "insert or replace into media values (?,?,?)", media)
        self.setUsn(nextUsn) # commits
        # if we have finished adding, we need to record the new folder mtime
        # so that we don't trigger a needless scan
        if finished:
        return finished

    def illegal(self, f):
        if isWin:
            for c in f:
                if c in "<>:\"/\\|?*^":
                    return True
        elif isMac:
            for c in f:
                if c in ":\\/":
                    return True

    # Media syncing - bundling zip files to send to server
    # Because there's no standard filename encoding for zips, and because not
    # all zip clients support retrieving mtime, we store the files as ascii
    # and place a json file in the zip with the necessary information.

    def zipAdded(self):
        "Add files to a zip until over SYNC_ZIP_SIZE/COUNT. Return zip data."
        f = StringIO()
        z = zipfile.ZipFile(f, "w", compression=zipfile.ZIP_DEFLATED)
        sz = 0
        cnt = 0
        files = {}
        cur = self.db.execute(
            "select fname from log where type = ?", MEDIA_ADD)
        fnames = []
        while 1:
            fname = cur.fetchone()
            if not fname:
                # add a flag so the server knows it can clean up
                z.writestr("_finished", "")
            fname = fname[0]
            z.write(fname, str(cnt))
            files[str(cnt)] = fname
            sz += os.path.getsize(fname)
            if sz > SYNC_ZIP_SIZE or cnt > SYNC_ZIP_COUNT:
            cnt += 1
        z.writestr("_meta", json.dumps(files))
        return f.getvalue(), fnames

    def forgetAdded(self, fnames):
        if not fnames:
        self.db.executemany("delete from log where fname = ?", fnames)

    # Tracking changes (private)

    def _initDB(self):
create table media (fname text primary key, csum text, mod int);
create table meta (dirMod int, usn int); insert into meta values (0, 0);
create table log (fname text primary key, type int);

    def _mtime(self, path):
        return int(os.stat(path).st_mtime)

    def _checksum(self, path):
        return checksum(open(path, "rb").read())

    def usn(self):
        return self.db.scalar("select usn from meta")

    def setUsn(self, usn):
        self.db.execute("update meta set usn = ?", usn)

    def syncMod(self):
        self.db.execute("update meta set dirMod = ?", self._mtime(self.dir()))

    def _changed(self):
        "Return dir mtime if it has changed since the last findChanges()"
        # doesn't track edits, but user can add or remove a file to update
        mod = self.db.scalar("select dirMod from meta")
        mtime = self._mtime(self.dir())
        if not self._isFAT32() and mod and mod == mtime:
            return False
        return mtime

    def findChanges(self):
        "Scan the media folder if it's changed, and note any changes."
        if self._changed():

    def _logChanges(self):
        (added, removed) = self._changes()
        log = []
        media = []
        mediaRem = []
        for f in added:
            mt = self._mtime(f)
            media.append((f, self._checksum(f), mt))
            log.append((f, MEDIA_ADD))
        for f in removed:
            log.append((f, MEDIA_REM))
        # update media db
        self.db.executemany("insert or replace into media values (?,?,?)",
        if mediaRem:
            self.db.executemany("delete from media where fname = ?",
        self.db.execute("update meta set dirMod = ?", self._mtime(self.dir()))
        # and logs
        self.db.executemany("insert or replace into log values (?,?)", log)

    def _changes(self):
        self.cache = {}
        for (name, csum, mod) in self.db.execute(
            "select * from media"):
            self.cache[name] = [csum, mod, False]
        added = []
        removed = []
        # loop through on-disk files
        for f in os.listdir(self.dir()):
            # ignore folders and thumbs.db
            if os.path.isdir(f):
            if f.lower() == "thumbs.db":
            # and files with invalid chars
            bad = False
            for c in "\0", "/", "\\", ":":
                if c in f:
                    bad = True
            if bad:
            # empty files are invalid; clean them up and continue
            if not os.path.getsize(f):
            # newly added?
            if f not in self.cache:
                # modified since last time?
                if self._mtime(f) != self.cache[f][1]:
                    # and has different checksum?
                    if self._checksum(f) != self.cache[f][0]:
                # mark as used
                self.cache[f][2] = True
        # look for any entries in the cache that no longer exist on disk
        for (k, v) in self.cache.items():
            if not v[2]:
        return added, removed

    def sanityCheck(self):
        assert not self.db.scalar("select count() from log")
        cnt = self.db.scalar("select count() from media")
        return cnt

    def forceResync(self):
        self.db.execute("delete from media")
        self.db.execute("delete from log")
        self.db.execute("update meta set usn = 0, dirMod = 0")

    def removeExisting(self, files):
        "Remove files from list of files to sync, and return missing files."
        need = []
        remove = []
        for f in files:
            if self.db.execute("select 1 from log where fname=?", f):
        self.db.executemany("delete from log where fname=?", remove)
        # if we need all the server files, it's faster to pass None than
        # the full list
        if need and len(files) == len(need):
            return None
        return need
Пример #9
class MediaManager(object):

    soundRegexps = ["(?i)(\[sound:(?P<fname>[^]]+)\])"]
    imgRegexps = [
        # src element quoted case
        "(?i)(<img[^>]* src=(?P<str>[\"'])(?P<fname>[^>]+?)(?P=str)[^>]*>)",
        # unquoted case
        "(?i)(<img[^>]* src=(?!['\"])(?P<fname>[^ >]+)[^>]*?>)",
    regexps = soundRegexps + imgRegexps

    def __init__(self, col, server):
        self.col = col
        if server:
            self._dir = None
        # media directory
        self._dir = re.sub("(?i)\.(anki2)$", ".media", self.col.path)
        # convert dir to unicode if it's not already
        if isinstance(self._dir, str):
            self._dir = unicode(self._dir, sys.getfilesystemencoding())
        if not os.path.exists(self._dir):
            self._oldcwd = os.getcwd()
        except OSError:
            # cwd doesn't exist
            self._oldcwd = None
        except OSError:
            raise Exception("invalidTempFolder")
        # change database

    def connect(self):
        if self.col.server:
        path = self.dir()+".db2"
        create = not os.path.exists(path)
        self.db = DB(path)
        if create:

    def _initDB(self):
create table media (
 fname text not null primary key,
 csum text,           -- null indicates deleted file
 mtime int not null,  -- zero if deleted
 dirty int not null

create index idx_media_dirty on media (dirty);

create table meta (dirMod int, lastUsn int); insert into meta values (0, 0);

    def maybeUpgrade(self):
        oldpath = self.dir()+".db"
        if os.path.exists(oldpath):
            self.db.execute('attach "../collection.media.db" as old')
    insert into media
     select m.fname, csum, mod, ifnull((select 1 from log l2 where l2.fname=m.fname), 0) as dirty
     from old.media m
     left outer join old.log l using (fname)
     select fname, null, 0, 1 from old.log where type=1;""")
                self.db.execute("delete from meta")
    insert into meta select dirMod, usn from old.meta
            except Exception, e:
                # if we couldn't import the old db for some reason, just start
                # anew
                self.col.log("failed to import old media db:"+traceback.format_exc())
            self.db.execute("detach old")
            npath = "../collection.media.db.old"
            if os.path.exists(npath):
            os.rename("../collection.media.db", npath)
Пример #10
class MediaManager(object):

    # other code depends on this order, so don't reorder
    regexps = ("(?i)(\[sound:([^]]+)\])",

    def __init__(self, col):
        self.col = col
        # media directory
        self._dir = re.sub("(?i)\.(anki2)$", ".media", self.col.path)
        if not os.path.exists(self._dir):
        self._oldcwd = os.getcwd()
        # change database

    def connect(self):
        if self.col.server:
        path = self.dir() + ".db"
        create = not os.path.exists(path)
        self.db = DB(path)
        if create:

    def close(self):
        if self.col.server:
        self.db = None
        # change cwd back to old location

    def dir(self):
        return self._dir

    # Adding media

    def addFile(self, opath):
        """Copy PATH to MEDIADIR, and return new filename.
If the same name exists, compare checksums."""
        mdir = self.dir()
        # remove any dangerous characters
        base = re.sub(r"[][<>:/\\&]", "", os.path.basename(opath))
        dst = os.path.join(mdir, base)
        # if it doesn't exist, copy it directly
        if not os.path.exists(dst):
            shutil.copy2(opath, dst)
            return base
        # if it's identical, reuse
        if self.filesIdentical(opath, dst):
            return base
        # otherwise, find a unique name
        (root, ext) = os.path.splitext(base)

        def repl(match):
            n = int(match.group(1))
            return " (%d)" % (n + 1)

        while True:
            path = os.path.join(mdir, root + ext)
            if not os.path.exists(path):
            reg = " \((\d+)\)$"
            if not re.search(reg, root):
                root = root + " (1)"
                root = re.sub(reg, repl, root)
        # copy and return
        shutil.copy2(opath, path)
        return os.path.basename(os.path.basename(path))

    def filesIdentical(self, path1, path2):
        "True if files are the same."
        return (checksum(open(path1, "rb").read()) == checksum(
            open(path2, "rb").read()))

    # String manipulation

    def filesInStr(self, mid, string, includeRemote=False):
        l = []
        # convert latex first
        model = self.col.models.get(mid)
        string = mungeQA(string, None, None, model, None, self.col)
        # extract filenames
        for reg in self.regexps:
            for (full, fname) in re.findall(reg, string):
                isLocal = not re.match("(https?|ftp)://", fname.lower())
                if isLocal or includeRemote:
        return l

    def strip(self, txt):
        for reg in self.regexps:
            txt = re.sub(reg, "", txt)
        return txt

    def escapeImages(self, string):
        # Feeding webkit unicode can result in it not finding images, so on
        # linux/osx we percent escape the image paths as utf8. On Windows the
        # problem is more complicated - if we percent-escape as utf8 it fixes
        # some images but breaks others. When filenames are normalized by
        # dropbox they become unreadable if we escape them.
        if isWin:
            return string

        def repl(match):
            tag = match.group(1)
            fname = match.group(2)
            if re.match("(https?|ftp)://", fname):
                return tag
            return tag.replace(fname, urllib.quote(fname.encode("utf-8")))

        return re.sub(self.regexps[1], repl, string)

    # Rebuilding DB

    def check(self, local=None):
        "Return (missingFiles, unusedFiles)."
        mdir = self.dir()
        # generate card q/a and look through all references
        normrefs = {}

        def norm(s):
            if isinstance(s, unicode):
                return unicodedata.normalize('NFD', s)
            return s

        for f in self.allMedia():
            normrefs[norm(f)] = True
        # loop through directory and find unused & missing media
        unused = []
        if local is None:
            files = os.listdir(mdir)
            files = local
        for file in files:
            if not local:
                path = os.path.join(mdir, file)
                if not os.path.isfile(path):
                    # ignore directories
            nfile = norm(file)
            if nfile not in normrefs:
                del normrefs[nfile]
        nohave = normrefs.keys()
        return (nohave, unused)

    def allMedia(self):
        "Return a set of all referenced filenames."
        files = set()
        for mid, flds in self.col.db.execute("select mid, flds from notes"):
            for f in self.filesInStr(mid, flds):
        return files

    # Copying on import
    # FIXME: check if the files are actually identical, and rewrite references
    # if necessary

    def copyTo(self, rdir):
        "Copy media to RDIR. Return number of files copied."
        ldir = self.dir()
        if not os.path.exists(ldir):
            return 0
        cnt = 0
        for f in os.listdir(ldir):
            src = os.path.join(ldir, f)
            dst = os.path.join(rdir, f)
            if not os.path.exists(dst):
                shutil.copy2(src, dst)
            cnt += 1
        return cnt

    # Media syncing - changes and removal

    def hasChanged(self):
        return self.db.scalar("select 1 from log limit 1")

    def removed(self):
        return self.db.list("select * from log where type = ?", MEDIA_REM)

    def syncRemove(self, fnames):
        # remove provided deletions
        for f in fnames:
            if os.path.exists(f):
            self.db.execute("delete from log where fname = ?", f)
            self.db.execute("delete from media where fname = ?", f)
        # and all locally-logged deletions, as server has acked them
        self.db.execute("delete from log where type = ?", MEDIA_REM)

    # Media syncing - unbundling zip files from server

    def syncAdd(self, zipData):
        "Extract zip data; true if finished."
        f = StringIO(zipData)
        z = zipfile.ZipFile(f, "r")
        finished = False
        meta = None
        media = []
        sizecnt = 0
        # get meta info first
        assert z.getinfo("_meta").file_size < 100000
        meta = simplejson.loads(z.read("_meta"))
        nextUsn = int(z.read("_usn"))
        # then loop through all files
        for i in z.infolist():
            # check for zip bombs
            sizecnt += i.file_size
            assert sizecnt < 100 * 1024 * 1024
            if i.filename == "_meta" or i.filename == "_usn":
                # ignore previously-retrieved meta
            elif i.filename == "_finished":
                # last zip in set
                finished = True
                data = z.read(i)
                csum = checksum(data)
                name = meta[i.filename]
                # can we store the file on this system?
                if self.illegal(i.filename):
                # save file
                open(name, "wb").write(data)
                # update db
                media.append((name, csum, self._mtime(name)))
                # remove entries from local log
                self.db.execute("delete from log where fname = ?", name)
        # update media db and note new starting usn
        if media:
            self.db.executemany("insert or replace into media values (?,?,?)",
        self.setUsn(nextUsn)  # commits
        # if we have finished adding, we need to record the new folder mtime
        # so that we don't trigger a needless scan
        if finished:
        return finished

    def illegal(self, f):
        if isWin:
            for c in f:
                if c in "<>:\"/\\|?*^":
                    return True
        elif isMac:
            for c in f:
                if c in ":\\/":
                    return True

    # Media syncing - bundling zip files to send to server
    # Because there's no standard filename encoding for zips, and because not
    # all zip clients support retrieving mtime, we store the files as ascii
    # and place a json file in the zip with the necessary information.

    def zipAdded(self):
        "Add files to a zip until over SYNC_ZIP_SIZE. Return zip data."
        f = StringIO()
        z = zipfile.ZipFile(f, "w", compression=zipfile.ZIP_DEFLATED)
        sz = 0
        cnt = 0
        files = {}
        cur = self.db.execute("select fname from log where type = ?",
        fnames = []
        while 1:
            fname = cur.fetchone()
            if not fname:
                # add a flag so the server knows it can clean up
                z.writestr("_finished", "")
            fname = fname[0]
            z.write(fname, str(cnt))
            files[str(cnt)] = fname
            sz += os.path.getsize(fname)
            if sz > SYNC_ZIP_SIZE:
            cnt += 1
        z.writestr("_meta", simplejson.dumps(files))
        return f.getvalue(), fnames

    def forgetAdded(self, fnames):
        if not fnames:
        self.db.executemany("delete from log where fname = ?", fnames)

    # Tracking changes (private)

    def _initDB(self):
create table media (fname text primary key, csum text, mod int);
create table meta (dirMod int, usn int); insert into meta values (0, 0);
create table log (fname text primary key, type int);

    def _mtime(self, path):
        return int(os.stat(path).st_mtime)

    def _checksum(self, path):
        return checksum(open(path, "rb").read())

    def usn(self):
        return self.db.scalar("select usn from meta")

    def setUsn(self, usn):
        self.db.execute("update meta set usn = ?", usn)

    def syncMod(self):
        self.db.execute("update meta set dirMod = ?", self._mtime(self.dir()))

    def _changed(self):
        "Return dir mtime if it has changed since the last findChanges()"
        # doesn't track edits, but user can add or remove a file to update
        mod = self.db.scalar("select dirMod from meta")
        mtime = self._mtime(self.dir())
        if mod and mod == mtime:
            return False
        return mtime

    def findChanges(self):
        "Scan the media folder if it's changed, and note any changes."
        if self._changed():

    def _logChanges(self):
        (added, removed) = self._changes()
        log = []
        media = []
        mediaRem = []
        for f in added:
            mt = self._mtime(f)
            media.append((f, self._checksum(f), mt))
            log.append((f, MEDIA_ADD))
        for f in removed:
            mediaRem.append((f, ))
            log.append((f, MEDIA_REM))
        # update media db
        self.db.executemany("insert or replace into media values (?,?,?)",
        if mediaRem:
            self.db.executemany("delete from media where fname = ?", mediaRem)
        self.db.execute("update meta set dirMod = ?", self._mtime(self.dir()))
        # and logs
        self.db.executemany("insert or replace into log values (?,?)", log)

    def _changes(self):
        self.cache = {}
        for (name, csum, mod) in self.db.execute("select * from media"):
            self.cache[name] = [csum, mod, False]
        added = []
        removed = []
        # loop through on-disk files
        for f in os.listdir(self.dir()):
            # ignore folders
            if os.path.isdir(f):
            # newly added?
            if f not in self.cache:
                # modified since last time?
                if self._mtime(f) != self.cache[f][1]:
                    # and has different checksum?
                    if self._checksum(f) != self.cache[f][0]:
                # mark as used
                self.cache[f][2] = True
        # look for any entries in the cache that no longer exist on disk
        for (k, v) in self.cache.items():
            if not v[2]:
        return added, removed

    def sanityCheck(self):
        assert not self.db.scalar("select count() from log")
        cnt = self.db.scalar("select count() from media")
        return cnt
Пример #11
class MediaManager:

    soundRegexps = ["(?i)(\[sound:(?P<fname>[^]]+)\])"]
    imgRegexps = [
        # src element quoted case
        "(?i)(<img[^>]* src=(?P<str>[\"'])(?P<fname>[^>]+?)(?P=str)[^>]*>)",
        # unquoted case
        "(?i)(<img[^>]* src=(?!['\"])(?P<fname>[^ >]+)[^>]*?>)",
    regexps = soundRegexps + imgRegexps

    def __init__(self, col, server):
        self.col = col
        if server:
            self._dir = None
        # media directory
        self._dir = re.sub("(?i)\.(anki2)$", ".media", self.col.path)
        if not os.path.exists(self._dir):
        # change database

    def connect(self):
        if self.col.server:
        path = self.dir()+".db2"
        create = not os.path.exists(path)
        self.db = DB(path)
        if create:

    def _initDB(self):
create table media (
 fname text not null primary key,
 csum text,           -- null indicates deleted file
 mtime int not null,  -- zero if deleted
 dirty int not null

create index idx_media_dirty on media (dirty);

create table meta (dirMod int, lastUsn int); insert into meta values (0, 0);

    def maybeUpgrade(self):
        oldpath = self.dir()+".db"
        if os.path.exists(oldpath):
            self.db.execute('attach "../collection.media.db" as old')
    insert into media
     select m.fname, csum, mod, ifnull((select 1 from log l2 where l2.fname=m.fname), 0) as dirty
     from old.media m
     left outer join old.log l using (fname)
     select fname, null, 0, 1 from old.log where type=1;""")
                self.db.execute("delete from meta")
    insert into meta select dirMod, usn from old.meta
            except Exception as e:
                # if we couldn't import the old db for some reason, just start
                # anew
                self.col.log("failed to import old media db:"+traceback.format_exc())
            self.db.execute("detach old")
            npath = os.path.join(self.dir(), "collection.media.db.old")
            if os.path.exists(npath):
            os.rename(os.path.join(self.dir(), "collection.media.db"), npath)

    def close(self):
        if self.col.server:
        self.db = None

    def dir(self):
        return self._dir

    def _isFAT32(self):
        if not isWin:
        import win32api, win32file
            name = win32file.GetVolumeNameForVolumeMountPoint(self._dir[:3])
            # mapped & unmapped network drive; pray that it's not vfat
        if win32api.GetVolumeInformation(name)[4].lower().startswith("fat"):
            return True

    # Adding media
    # opath must be in unicode

    def addFile(self, opath):
        return self.writeData(opath, open(opath, "rb").read())

    def writeData(self, opath, data):
        # if fname is a full path, use only the basename
        fname = os.path.basename(opath)
        # make sure we write it in NFC form (on mac will autoconvert to NFD),
        # and return an NFC-encoded reference
        fname = unicodedata.normalize("NFC", fname)
        # remove any dangerous characters
        base = self.stripIllegal(fname)
        (root, ext) = os.path.splitext(base)
        def repl(match):
            n = int(match.group(1))
            return " (%d)" % (n+1)
        # find the first available name
        csum = checksum(data)
        while True:
            fname = root + ext
            path = os.path.join(self.dir(), fname)
            # if it doesn't exist, copy it directly
            if not os.path.exists(path):
                open(path, "wb").write(data)
                return fname
            # if it's identical, reuse
            if checksum(open(path, "rb").read()) == csum:
                return fname
            # otherwise, increment the index in the filename
            reg = " \((\d+)\)$"
            if not re.search(reg, root):
                root = root + " (1)"
                root = re.sub(reg, repl, root)

    # String manipulation

    def filesInStr(self, mid, string, includeRemote=False):
        l = []
        model = self.col.models.get(mid)
        strings = []
        if model['type'] == MODEL_CLOZE and "{{c" in string:
            # if the field has clozes in it, we'll need to expand the
            # possibilities so we can render latex
            strings = self._expandClozes(string)
            strings = [string]
        for string in strings:
            # handle latex
            string = mungeQA(string, None, None, model, None, self.col)
            # extract filenames
            for reg in self.regexps:
                for match in re.finditer(reg, string):
                    fname = match.group("fname")
                    isLocal = not re.match("(https?|ftp)://", fname.lower())
                    if isLocal or includeRemote:
        return l

    def _expandClozes(self, string):
        ords = set(re.findall("{{c(\d+)::.+?}}", string))
        strings = []
        from anki.template.template import clozeReg
        def qrepl(m):
            if m.group(3):
                return "[%s]" % m.group(3)
                return "[...]"
        def arepl(m):
            return m.group(1)
        for ord in ords:
            s = re.sub(clozeReg%ord, qrepl, string)
            s = re.sub(clozeReg%".+?", "\\1", s)
        strings.append(re.sub(clozeReg%".+?", arepl, string))
        return strings

    def transformNames(self, txt, func):
        for reg in self.regexps:
            txt = re.sub(reg, func, txt)
        return txt

    def strip(self, txt):
        for reg in self.regexps:
            txt = re.sub(reg, "", txt)
        return txt

    def escapeImages(self, string, unescape=False):
        if unescape:
            fn = urllib.parse.unquote
            fn = urllib.parse.quote
        def repl(match):
            tag = match.group(0)
            fname = match.group("fname")
            if re.match("(https?|ftp)://", fname):
                return tag
            return tag.replace(fname, fn(fname))
        for reg in self.imgRegexps:
            string = re.sub(reg, repl, string)
        return string

    # Rebuilding DB

    def check(self, local=None):
        "Return (missingFiles, unusedFiles)."
        mdir = self.dir()
        # gather all media references in NFC form
        allRefs = set()
        for nid, mid, flds in self.col.db.execute("select id, mid, flds from notes"):
            noteRefs = self.filesInStr(mid, flds)
            # check the refs are in NFC
            for f in noteRefs:
                # if they're not, we'll need to fix them first
                if f != unicodedata.normalize("NFC", f):
                    noteRefs = self.filesInStr(mid, flds)
        # loop through media folder
        unused = []
        invalid = []
        if local is None:
            files = os.listdir(mdir)
            files = local
        renamedFiles = False
        for file in files:
            path = os.path.join(self.dir(), file)
            if not local:
                if not os.path.isfile(path):
                    # ignore directories
            if file.startswith("_"):
                # leading _ says to ignore file
            nfcFile = unicodedata.normalize("NFC", file)
            nfcPath = os.path.join(self.dir(), nfcFile)
            # we enforce NFC fs encoding on non-macs; on macs we'll have gotten
            # NFD so we use the above variable for comparing references
            if not isMac and not local:
                if file != nfcFile:
                    # delete if we already have the NFC form, otherwise rename
                    if os.path.exists(nfcPath):
                        renamedFiles = True
                        os.rename(path, nfcPath)
                        renamedFiles = True
                    file = nfcFile
            # compare
            if nfcFile not in allRefs:
        # if we renamed any files to nfc format, we must rerun the check
        # to make sure the renamed files are not marked as unused
        if renamedFiles:
            return self.check(local=local)
        nohave = [x for x in allRefs if not x.startswith("_")]
        return (nohave, unused, invalid)

    def _normalizeNoteRefs(self, nid):
        note = self.col.getNote(nid)
        for c, fld in enumerate(note.fields):
            nfc = unicodedata.normalize("NFC", fld)
            if nfc != fld:
                note.fields[c] = nfc

    # Copying on import

    def have(self, fname):
        return os.path.exists(os.path.join(self.dir(), fname))

    # Illegal characters

    _illegalCharReg = re.compile(r'[][><:"/?*^\\|\0\r\n]')

    def stripIllegal(self, str):
        return re.sub(self._illegalCharReg, "", str)

    def hasIllegal(self, str):
        return not not re.search(self._illegalCharReg, str)

    # Tracking changes

    def findChanges(self):
        "Scan the media folder if it's changed, and note any changes."
        if self._changed():

    def haveDirty(self):
        return self.db.scalar("select 1 from media where dirty=1 limit 1")

    def _mtime(self, path):
        return int(os.stat(path).st_mtime)

    def _checksum(self, path):
        return checksum(open(path, "rb").read())

    def _changed(self):
        "Return dir mtime if it has changed since the last findChanges()"
        # doesn't track edits, but user can add or remove a file to update
        mod = self.db.scalar("select dirMod from meta")
        mtime = self._mtime(self.dir())
        if not self._isFAT32() and mod and mod == mtime:
            return False
        return mtime

    def _logChanges(self):
        (added, removed) = self._changes()
        media = []
        for f in added:
            path = os.path.join(self.dir(), f)
            mt = self._mtime(path)
            media.append((f, self._checksum(path), mt, 1))
        for f in removed:
            media.append((f, None, 0, 1))
        # update media db
        self.db.executemany("insert or replace into media values (?,?,?,?)",
        self.db.execute("update meta set dirMod = ?", self._mtime(self.dir()))

    def _changes(self):
        self.cache = {}
        for (name, csum, mod) in self.db.execute(
            "select fname, csum, mtime from media where csum is not null"):
            self.cache[name] = [csum, mod, False]
        added = []
        removed = []
        # loop through on-disk files
        for f in os.listdir(self.dir()):
            path = os.path.join(self.dir(), f)
            # ignore folders and thumbs.db
            if os.path.isdir(path):
            if f.lower() == "thumbs.db":
            # and files with invalid chars
            if self.hasIllegal(f):
            # empty files are invalid; clean them up and continue
            sz = os.path.getsize(path)
            if not sz:
            if sz > 100*1024*1024:
                self.col.log("ignoring file over 100MB", f)
            # check encoding
            if not isMac:
                normf = unicodedata.normalize("NFC", f)
                normpath = os.path.join(self.dir(), normf)
                if f != normf:
                    # wrong filename encoding which will cause sync errors
                    if os.path.exists(normpath):
                        os.rename(path, normpath)
            # newly added?
            if f not in self.cache:
                # modified since last time?
                if self._mtime(path) != self.cache[f][1]:
                    # and has different checksum?
                    if self._checksum(path) != self.cache[f][0]:
                # mark as used
                self.cache[f][2] = True
        # look for any entries in the cache that no longer exist on disk
        for (k, v) in list(self.cache.items()):
            if not v[2]:
        return added, removed

    # Syncing-related

    def lastUsn(self):
        return self.db.scalar("select lastUsn from meta")

    def setLastUsn(self, usn):
        self.db.execute("update meta set lastUsn = ?", usn)

    def syncInfo(self, fname):
        ret = self.db.first(
            "select csum, dirty from media where fname=?", fname)
        return ret or (None, 0)

    def markClean(self, fnames):
        for fname in fnames:
                "update media set dirty=0 where fname=?", fname)

    def syncDelete(self, fname):
        path = os.path.join(self.dir(), fname)
        if os.path.exists(path):
        self.db.execute("delete from media where fname=?", fname)

    def mediaCount(self):
        return self.db.scalar(
            "select count() from media where csum is not null")

    def dirtyCount(self):
        return self.db.scalar(
            "select count() from media where dirty=1")

    def forceResync(self):
        self.db.execute("delete from media")
        self.db.execute("update meta set lastUsn=0,dirMod=0")

    # Media syncing: zips

    def mediaChangesZip(self):
        f = io.BytesIO()
        z = zipfile.ZipFile(f, "w", compression=zipfile.ZIP_DEFLATED)

        fnames = []
        # meta is list of (fname, zipname), where zipname of None
        # is a deleted file
        meta = []
        sz = 0

        for c, (fname, csum) in enumerate(self.db.execute(
                        "select fname, csum from media where dirty=1"
                        " limit %d"%SYNC_ZIP_COUNT)):

            path = os.path.join(self.dir(), fname)
            normname = unicodedata.normalize("NFC", fname)

            if csum:
                self.col.log("+media zip", fname)
                z.write(path, str(c))
                meta.append((normname, str(c)))
                sz += os.path.getsize(path)
                self.col.log("-media zip", fname)
                meta.append((normname, ""))

            if sz >= SYNC_ZIP_SIZE:

        z.writestr("_meta", json.dumps(meta))
        return f.getvalue(), fnames

    def addFilesFromZip(self, zipData):
        "Extract zip data; true if finished."
        f = io.BytesIO(zipData)
        z = zipfile.ZipFile(f, "r")
        media = []
        # get meta info first
        meta = json.loads(z.read("_meta").decode("utf8"))
        # then loop through all files
        cnt = 0
        for i in z.infolist():
            if i.filename == "_meta":
                # ignore previously-retrieved meta
                data = z.read(i)
                csum = checksum(data)
                name = meta[i.filename]
                # normalize name for platform
                if isMac:
                    name = unicodedata.normalize("NFD", name)
                    name = unicodedata.normalize("NFC", name)
                # save file
                path = os.path.join(self.dir(), name)
                open(path, "wb").write(data)
                # update db
                media.append((name, csum, self._mtime(path), 0))
                cnt += 1
        if media:
                "insert or replace into media values (?,?,?,?)", media)
        return cnt
Пример #12
class MediaManager:

    soundRegexps = [r"(?i)(\[sound:(?P<fname>[^]]+)\])"]
    imgRegexps = [
        # src element quoted case
        r"(?i)(<img[^>]* src=(?P<str>[\"'])(?P<fname>[^>]+?)(?P=str)[^>]*>)",
        # unquoted case
        r"(?i)(<img[^>]* src=(?!['\"])(?P<fname>[^ >]+)[^>]*?>)",
    regexps = soundRegexps + imgRegexps
    db: Optional[DB]

    def __init__(self, col, server: bool) -> None:
        self.col = col
        if server:
            self._dir = None
        # media directory
        self._dir = re.sub(r"(?i)\.(anki2)$", ".media", self.col.path)
        if not os.path.exists(self._dir):
            self._oldcwd = os.getcwd()
        except OSError:
            # cwd doesn't exist
            self._oldcwd = None
        except OSError:
            raise Exception("invalidTempFolder")
        # change database

    def connect(self) -> None:
        if self.col.server:
        path = self.dir() + ".db2"
        create = not os.path.exists(path)
        self.db = DB(path)
        if create:

    def _initDB(self) -> None:
create table media (
 fname text not null primary key,
 csum text,           -- null indicates deleted file
 mtime int not null,  -- zero if deleted
 dirty int not null

create index idx_media_dirty on media (dirty);

create table meta (dirMod int, lastUsn int); insert into meta values (0, 0);

    def maybeUpgrade(self) -> None:
        oldpath = self.dir() + ".db"
        if os.path.exists(oldpath):
            self.db.execute('attach "../collection.media.db" as old')
    insert into media
     select m.fname, csum, mod, ifnull((select 1 from log l2 where l2.fname=m.fname), 0) as dirty
     from old.media m
     left outer join old.log l using (fname)
     select fname, null, 0, 1 from old.log where type=1;""")
                self.db.execute("delete from meta")
    insert into meta select dirMod, usn from old.meta
            except Exception as e:
                # if we couldn't import the old db for some reason, just start
                # anew
                self.col.log("failed to import old media db:" +
            self.db.execute("detach old")
            npath = "../collection.media.db.old"
            if os.path.exists(npath):
            os.rename("../collection.media.db", npath)

    def close(self) -> None:
        if self.col.server:
        self.db = None
        # change cwd back to old location
        if self._oldcwd:
                # may have been deleted

    def _deleteDB(self) -> None:
        path = self.db._path

    def dir(self) -> Any:
        return self._dir

    def _isFAT32(self) -> bool:
        if not isWin:
            return False
        # pylint: disable=import-error
        import win32api, win32file  # pytype: disable=import-error

            name = win32file.GetVolumeNameForVolumeMountPoint(self._dir[:3])
            # mapped & unmapped network drive; pray that it's not vfat
            return False
        if win32api.GetVolumeInformation(name)[4].lower().startswith("fat"):
            return True
        return False

    # Adding media
    # opath must be in unicode

    def addFile(self, opath: str) -> Any:
        with open(opath, "rb") as f:
            return self.writeData(opath, f.read())

    def writeData(self,
                  opath: str,
                  data: bytes,
                  typeHint: Optional[str] = None) -> Any:
        # if fname is a full path, use only the basename
        fname = os.path.basename(opath)

        # if it's missing an extension and a type hint was provided, use that
        if not os.path.splitext(fname)[1] and typeHint:
            # mimetypes is returning '.jpe' even after calling .init(), so we'll do
            # it manually instead
            typeMap = {
                "image/jpeg": ".jpg",
                "image/png": ".png",
            if typeHint in typeMap:
                fname += typeMap[typeHint]

        # make sure we write it in NFC form (pre-APFS Macs will autoconvert to NFD),
        # and return an NFC-encoded reference
        fname = unicodedata.normalize("NFC", fname)
        # ensure it's a valid filename
        base = self.cleanFilename(fname)
        (root, ext) = os.path.splitext(base)

        def repl(match):
            n = int(match.group(1))
            return " (%d)" % (n + 1)

        # find the first available name
        csum = checksum(data)
        while True:
            fname = root + ext
            path = os.path.join(self.dir(), fname)
            # if it doesn't exist, copy it directly
            if not os.path.exists(path):
                with open(path, "wb") as f:
                return fname
            # if it's identical, reuse
            with open(path, "rb") as f:
                if checksum(f.read()) == csum:
                    return fname
            # otherwise, increment the index in the filename
            reg = r" \((\d+)\)$"
            if not re.search(reg, root):
                root = root + " (1)"
                root = re.sub(reg, repl, root)

    # String manipulation

    def filesInStr(self,
                   mid: Union[int, str],
                   string: str,
                   includeRemote: bool = False) -> List[str]:
        l = []
        model = self.col.models.get(mid)
        strings: List[str] = []
        if model["type"] == MODEL_CLOZE and "{{c" in string:
            # if the field has clozes in it, we'll need to expand the
            # possibilities so we can render latex
            strings = self._expandClozes(string)
            strings = [string]
        for string in strings:
            # handle latex
            string = mungeQA(string, None, None, model, None, self.col)
            # extract filenames
            for reg in self.regexps:
                for match in re.finditer(reg, string):
                    fname = match.group("fname")
                    isLocal = not re.match("(https?|ftp)://", fname.lower())
                    if isLocal or includeRemote:
        return l

    def _expandClozes(self, string: str) -> List[str]:
        ords = set(re.findall(r"{{c(\d+)::.+?}}", string))
        strings = []
        from anki.template.template import (

        def qrepl(m):
            if m.group(CLOZE_REGEX_MATCH_GROUP_HINT):
                return "[%s]" % m.group(CLOZE_REGEX_MATCH_GROUP_HINT)
                return "[...]"

        def arepl(m):
            return m.group(CLOZE_REGEX_MATCH_GROUP_CONTENT)

        for ord in ords:
            s = re.sub(clozeReg % ord, qrepl, string)
            s = re.sub(clozeReg % ".+?", arepl, s)
        strings.append(re.sub(clozeReg % ".+?", arepl, string))
        return strings

    def transformNames(self, txt: str, func: Callable) -> Any:
        for reg in self.regexps:
            txt = re.sub(reg, func, txt)
        return txt

    def strip(self, txt: str) -> str:
        for reg in self.regexps:
            txt = re.sub(reg, "", txt)
        return txt

    def escapeImages(self, string: str, unescape: bool = False) -> str:
        fn: Callable
        if unescape:
            fn = urllib.parse.unquote
            fn = urllib.parse.quote

        def repl(match):
            tag = match.group(0)
            fname = match.group("fname")
            if re.match("(https?|ftp)://", fname):
                return tag
            return tag.replace(fname, fn(fname))

        for reg in self.imgRegexps:
            string = re.sub(reg, repl, string)
        return string

    # Rebuilding DB

    def check(
        local: Optional[List[str]] = None
    ) -> Tuple[List[str], List[str], List[str]]:
        "Return (missingFiles, unusedFiles)."
        mdir = self.dir()
        # gather all media references in NFC form
        allRefs = set()
        for nid, mid, flds in self.col.db.execute(
                "select id, mid, flds from notes"):
            noteRefs = self.filesInStr(mid, flds)
            # check the refs are in NFC
            for f in noteRefs:
                # if they're not, we'll need to fix them first
                if f != unicodedata.normalize("NFC", f):
                    noteRefs = self.filesInStr(mid, flds)
        # loop through media folder
        unused = []
        if local is None:
            files = os.listdir(mdir)
            files = local
        renamedFiles = False
        dirFound = False
        warnings = []
        for file in files:
            if not local:
                if not os.path.isfile(file):
                    # ignore directories
                    dirFound = True
            if file.startswith("_"):
                # leading _ says to ignore file

            if self.hasIllegal(file):
                name = file.encode(sys.getfilesystemencoding(),
                name = str(name, sys.getfilesystemencoding())
                    _("Invalid file name, please rename: %s") % name)

            nfcFile = unicodedata.normalize("NFC", file)
            # we enforce NFC fs encoding on non-macs
            if not isMac and not local:
                if file != nfcFile:
                    # delete if we already have the NFC form, otherwise rename
                    if os.path.exists(nfcFile):
                        renamedFiles = True
                        os.rename(file, nfcFile)
                        renamedFiles = True
                    file = nfcFile
            # compare
            if nfcFile not in allRefs:
        # if we renamed any files to nfc format, we must rerun the check
        # to make sure the renamed files are not marked as unused
        if renamedFiles:
            return self.check(local=local)
        nohave = [x for x in allRefs if not x.startswith("_")]
        # make sure the media DB is valid
        except DBError:

        if dirFound:
                _("Anki does not support files in subfolders of the collection.media folder."
        return (nohave, unused, warnings)

    def _normalizeNoteRefs(self, nid) -> None:
        note = self.col.getNote(nid)
        for c, fld in enumerate(note.fields):
            nfc = unicodedata.normalize("NFC", fld)
            if nfc != fld:
                note.fields[c] = nfc

    # Copying on import

    def have(self, fname: str) -> bool:
        return os.path.exists(os.path.join(self.dir(), fname))

    # Illegal characters and paths

    _illegalCharReg = re.compile(r'[][><:"/?*^\\|\0\r\n]')

    def stripIllegal(self, str: str) -> str:
        return re.sub(self._illegalCharReg, "", str)

    def hasIllegal(self, s: str) -> bool:
        if re.search(self._illegalCharReg, s):
            return True
        except UnicodeEncodeError:
            return True
        return False

    def cleanFilename(self, fname: str) -> str:
        fname = self.stripIllegal(fname)
        fname = self._cleanWin32Filename(fname)
        fname = self._cleanLongFilename(fname)
        if not fname:
            fname = "renamed"

        return fname

    def _cleanWin32Filename(self, fname: str) -> str:
        if not isWin:
            return fname

        # deal with things like con/prn/etc
        p = pathlib.WindowsPath(fname)
        if p.is_reserved():
            fname = "renamed" + fname
            assert not pathlib.WindowsPath(fname).is_reserved()

        return fname

    def _cleanLongFilename(self, fname: str) -> Any:
        # a fairly safe limit that should work on typical windows
        # paths and on eCryptfs partitions, even with a duplicate
        # suffix appended
        namemax = 136

        if isWin:
            pathmax = 240
            pathmax = 1024

        # cap namemax based on absolute path
        dirlen = len(os.path.dirname(os.path.abspath(fname)))
        remaining = pathmax - dirlen
        namemax = min(remaining, namemax)
        assert namemax > 0

        if len(fname) > namemax:
            head, ext = os.path.splitext(fname)
            headmax = namemax - len(ext)
            head = head[0:headmax]
            fname = head + ext
            assert len(fname) <= namemax

        return fname

    # Tracking changes

    def findChanges(self) -> None:
        "Scan the media folder if it's changed, and note any changes."
        if self._changed():

    def haveDirty(self) -> Any:
        return self.db.scalar("select 1 from media where dirty=1 limit 1")

    def _mtime(self, path: str) -> int:
        return int(os.stat(path).st_mtime)

    def _checksum(self, path: str) -> str:
        with open(path, "rb") as f:
            return checksum(f.read())

    def _changed(self) -> int:
        "Return dir mtime if it has changed since the last findChanges()"
        # doesn't track edits, but user can add or remove a file to update
        mod = self.db.scalar("select dirMod from meta")
        mtime = self._mtime(self.dir())
        if not self._isFAT32() and mod and mod == mtime:
            return False
        return mtime

    def _logChanges(self) -> None:
        (added, removed) = self._changes()
        media = []
        for f, mtime in added:
            media.append((f, self._checksum(f), mtime, 1))
        for f in removed:
            media.append((f, None, 0, 1))
        # update media db
        self.db.executemany("insert or replace into media values (?,?,?,?)",
        self.db.execute("update meta set dirMod = ?", self._mtime(self.dir()))

    def _changes(self) -> Tuple[List[Tuple[str, int]], List[str]]:
        self.cache: Dict[str, Any] = {}
        for (name, csum, mod) in self.db.execute(
                "select fname, csum, mtime from media where csum is not null"):
            # previous entries may not have been in NFC form
            normname = unicodedata.normalize("NFC", name)
            self.cache[normname] = [csum, mod, False]
        added = []
        removed = []
        # loop through on-disk files
        with os.scandir(self.dir()) as it:
            for f in it:
                # ignore folders and thumbs.db
                if f.is_dir():
                if f.name.lower() == "thumbs.db":
                # and files with invalid chars
                if self.hasIllegal(f.name):
                # empty files are invalid; clean them up and continue
                sz = f.stat().st_size
                if not sz:
                if sz > 100 * 1024 * 1024:
                    self.col.log("ignoring file over 100MB", f.name)
                # check encoding
                normname = unicodedata.normalize("NFC", f.name)
                if not isMac:
                    if f.name != normname:
                        # wrong filename encoding which will cause sync errors
                        if os.path.exists(normname):
                            os.rename(f.name, normname)
                    # on Macs we can access the file using any normalization

                # newly added?
                mtime = int(f.stat().st_mtime)
                if normname not in self.cache:
                    added.append((normname, mtime))
                    # modified since last time?
                    if mtime != self.cache[normname][1]:
                        # and has different checksum?
                        if self._checksum(normname) != self.cache[normname][0]:
                            added.append((normname, mtime))
                    # mark as used
                    self.cache[normname][2] = True
        # look for any entries in the cache that no longer exist on disk
        for (k, v) in list(self.cache.items()):
            if not v[2]:
        return added, removed

    # Syncing-related

    def lastUsn(self) -> Any:
        return self.db.scalar("select lastUsn from meta")

    def setLastUsn(self, usn) -> None:
        self.db.execute("update meta set lastUsn = ?", usn)

    def syncInfo(self, fname) -> Any:
        ret = self.db.first("select csum, dirty from media where fname=?",
        return ret or (None, 0)

    def markClean(self, fnames) -> None:
        for fname in fnames:
            self.db.execute("update media set dirty=0 where fname=?", fname)

    def syncDelete(self, fname) -> None:
        if os.path.exists(fname):
        self.db.execute("delete from media where fname=?", fname)

    def mediaCount(self) -> Any:
        return self.db.scalar(
            "select count() from media where csum is not null")

    def dirtyCount(self) -> Any:
        return self.db.scalar("select count() from media where dirty=1")

    def forceResync(self) -> None:
        self.db.execute("delete from media")
        self.db.execute("update meta set lastUsn=0,dirMod=0")

    # Media syncing: zips

    def mediaChangesZip(self) -> Tuple[bytes, list]:
        f = io.BytesIO()
        z = zipfile.ZipFile(f, "w", compression=zipfile.ZIP_DEFLATED)

        fnames = []
        # meta is list of (fname, zipname), where zipname of None
        # is a deleted file
        meta = []
        sz = 0

        for c, (fname, csum) in enumerate(
                self.db.execute("select fname, csum from media where dirty=1"
                                " limit %d" % SYNC_ZIP_COUNT)):

            normname = unicodedata.normalize("NFC", fname)

            if csum:
                self.col.log("+media zip", fname)
                z.write(fname, str(c))
                meta.append((normname, str(c)))
                sz += os.path.getsize(fname)
                self.col.log("-media zip", fname)
                meta.append((normname, ""))

            if sz >= SYNC_ZIP_SIZE:

        z.writestr("_meta", json.dumps(meta))
        return f.getvalue(), fnames

    def addFilesFromZip(self, zipData) -> int:
        "Extract zip data; true if finished."
        f = io.BytesIO(zipData)
        z = zipfile.ZipFile(f, "r")
        media = []
        # get meta info first
        meta = json.loads(z.read("_meta").decode("utf8"))
        # then loop through all files
        cnt = 0
        for i in z.infolist():
            if i.filename == "_meta":
                # ignore previously-retrieved meta
                data = z.read(i)
                csum = checksum(data)
                name = meta[i.filename]
                # normalize name
                name = unicodedata.normalize("NFC", name)
                # save file
                with open(name, "wb") as f:  # type: ignore
                # update db
                media.append((name, csum, self._mtime(name), 0))
                cnt += 1
        if media:
                "insert or replace into media values (?,?,?,?)", media)
        return cnt
Пример #13
class ChangeLog:
    """Tracks changes made to notes"""
    def __init__(self):
        base_path = os.path.dirname(os.path.abspath(__file__))
        db_path = os.path.join(base_path, "..", "user_files", "changelog.db")
        need_create = not os.path.exists(db_path)
        self.db = DB(db_path)
        if need_create:
        max_id = self.db.scalar("select max(id) from changelog")
        if max_id is not None:
            self.next_id = max_id + 1
            self.next_id = 0

    def close(self):

    def commit_changes(self):
        self.db.mod = False

    def record_change(self, op, init_ts, change):
            insert into changelog (id, op, init_ts, ts, nid, fld, old, new)
            values (?,?,?,?,?,?,?,?)
            """, self.next_id, op, init_ts, change.ts, change.nid, change.fld,
            change.old, change.new)
        self.next_id += 1

    def record_and_commit_changes(self, op, init_ts, changes):
        data = []
        for change in changes:
            data.append((self.next_id, op, init_ts, change.ts, change.nid,
                         change.fld, change.old, change.new))
            self.next_id += 1
            insert into changelog (id, op, init_ts, ts, nid, fld, old, new)
            values (?,?,?,?,?,?,?,?)
        """, data)

    def _create_tables(self):
            create table if not exists changelog (
              id      integer primary key,
              -- identifies the operation performed
              op      text not null,
              -- timestamp (ms) when bulk changes were initiated
              init_ts integer not null,
              -- timestamp (ms) when field was changed
              ts      integer not null,
              -- note id
              nid     integer not null,
              -- field name
              fld     text not null,
              -- old value of field
              old     text not null,
              -- new value of field
              new     text not null

    def _create_indices(self):
            create index if not exists ix_changelog_ts on changelog (ts);
Пример #14
class Config(object):
    configDbName = "ankiprefs.db"

    def __init__(self, confDir):
        self.confDir = confDir
        self._conf = {}
        if isMac and (self.confDir == os.path.expanduser("~/.anki")):
            self.confDir = os.path.expanduser(
                "~/Library/Application Support/Anki")

    # dict interface
    def get(self, *args):
        return self._conf.get(*args)

    def __getitem__(self, key):
        return self._conf[key]

    def __setitem__(self, key, val):
        self._conf[key] = val

    def __contains__(self, key):
        return self._conf.__contains__(key)

    # load/save
    def load(self):
        path = self._dbPath()
        self.db = DB(path, text=str)
create table if not exists decks (path text primary key);
create table if not exists config (conf text not null);
        conf = self.db.scalar("select conf from config")
        if conf:
            # ensure there's something to update
            self.db.execute("insert or ignore into config values ('')")

    def save(self):
        self.db.execute("update config set conf = ?",

    # recent deck support
    def recentDecks(self):
        "Return a list of paths to remembered decks."
        # have to convert to unicode manually because of the text factory
        return [
            unicode(d[0], 'utf8')
            for d in self.db.execute("select path from decks")

    def addRecentDeck(self, path):
        "Add PATH to the list of recent decks if not already. Must be unicode."
        self.db.execute("insert or ignore into decks values (?)",

    def delRecentDeck(self, path):
        "Remove PATH from the list if it exists. Must be unicode."
        self.db.execute("delete from decks where path = ?",

    # helpers
    def _addDefaults(self):
        if self.get('confVer') >= defaultConf['confVer']:
        for (k, v) in defaultConf.items():
            if k not in self:
                self[k] = v

    def _dbPath(self):
        return os.path.join(self.confDir, self.configDbName)

    def _addAnkiDirs(self):
        base = self.confDir
        for x in (base, os.path.join(base,
                                     "addons"), os.path.join(base, "backups")):