Пример #1
0
        # Create 'blob_index' and 'toplevel*_index' and write out
        # '.blob' file.
        LEN_PREFIX = self.db.LEN_PREFIX
        is_hits_from_lpath_lang = lang in self.db.import_everything_langs
        blob_index = {}  # {blobname -> dbfile}
        toplevelname_index = {}  # {ilk -> toplevelname -> blobnames}
        toplevelprefix_index = {}  # {ilk -> prefix -> toplevelnames}
        for blob in tree.findall("file/scope"):
            assert lang == blob.get("lang"), \
                "Adding %s resource %s to %s blob" % (
                    lang, res, blob.get("lang"))
            blobname = blob.get("name")
            dbfile = self.db.bhash_from_blob_info(cix_path, lang, blobname)
            blob_index[blobname] = dbfile
            ET.ElementTree(blob).write(join(dbdir, dbfile + ".blob"))
            for toplevelname, elem in blob.names.iteritems():
                if "__local__" in elem.get("attributes", "").split():
                    # this is internal to the stdlib
                    continue
                ilk = elem.get("ilk") or elem.tag
                bft = toplevelname_index.setdefault(ilk, {})
                if toplevelname not in bft:
                    bft[toplevelname] = set([blobname])
                else:
                    bft[toplevelname].add(blobname)
                prefix = toplevelname[:LEN_PREFIX]
                tfp = toplevelprefix_index.setdefault(ilk, {})
                if prefix not in tfp:
                    tfp[prefix] = set([toplevelname])
                else:
Пример #2
0
    def _add_res(self, res):
        cix_path = res.path
        try:
            tree = tree_from_cix_path(cix_path)
        except ET.XMLParserError as ex:
            log.warn("could not load `%s' into catalog (skipping): %s",
                     cix_path, ex)
            return

        LEN_PREFIX = self.db.LEN_PREFIX
        res_id = self._new_res_id()
        res_data = {}  # {lang -> blobname -> ilk -> toplevelnames}
        name = tree.get("name") or splitext(basename(cix_path))[0]
        for blob in tree.findall("file/scope"):
            lang, blobname = blob.get("lang"), blob.get("name")
            if not lang:
                raise DatabaseError("add `%s': no 'lang' attr on %r" %
                                    (res, blob))

            # Create 'res_data'.
            tfifb = res_data.setdefault(lang, {})
            toplevelnames_from_ilk = tfifb.setdefault(blobname, {})
            if lang in self.db.import_everything_langs:
                for toplevelname, elem in blob.names.items():
                    ilk = elem.get("ilk") or elem.tag
                    if ilk not in toplevelnames_from_ilk:
                        toplevelnames_from_ilk[ilk] = set([toplevelname])
                    else:
                        toplevelnames_from_ilk[ilk].add(toplevelname)

            # Update 'toplevel*_index'.
            # toplevelname_index:   {lang -> ilk -> toplevelname -> res_id -> blobnames}
            # toplevelprefix_index: {lang -> ilk -> prefix -> res_id ->
            # toplevelnames}
            bfrftfi = self.toplevelname_index.setdefault(lang, {})
            tfrfpfi = self.toplevelprefix_index.setdefault(lang, {})
            for ilk, toplevelnames in toplevelnames_from_ilk.items():
                bfrft = bfrftfi.setdefault(ilk, {})
                tfrfp = tfrfpfi.setdefault(ilk, {})
                for toplevelname in toplevelnames:
                    bfr = bfrft.setdefault(toplevelname, {})
                    if res_id not in bfr:
                        bfr[res_id] = set([blobname])
                    else:
                        bfr[res_id].add(blobname)
                    prefix = toplevelname[:LEN_PREFIX]
                    tfr = tfrfp.setdefault(prefix, {})
                    if res_id not in tfr:
                        tfr[res_id] = set([toplevelname])
                    else:
                        tfr[res_id].add(toplevelname)

            # Update 'blob_index'.
            dbfile_and_res_id_from_blobname \
                = self.blob_index.setdefault(lang, {})
            assert blobname not in dbfile_and_res_id_from_blobname, \
                ("codeintel: %s %r blob in `%s' collides "
                    "with existing %s %r blob (from res_id %r) in catalog: "
                    "(XXX haven't decided how to deal with that yet)"
                    % (lang, blobname, cix_path, lang, blobname,
                       dbfile_and_res_id_from_blobname[blobname][1]))
            dbfile = self.db.bhash_from_blob_info(cix_path, lang, blobname)
            dbfile_and_res_id_from_blobname[blobname] = (dbfile, res_id)

            # Write out '.blob' file.
            dbdir = join(self.base_dir, safe_lang_from_lang(lang))
            if not exists(dbdir):
                log.debug("fs-write: mkdir '%s'", dbdir)
                os.makedirs(dbdir)
            log.debug("fs-write: catalog %s blob '%s'", lang, dbfile)
            ET.ElementTree(blob).write(join(dbdir, dbfile + ".blob"))

        # Update 'res_index'.
        last_updated = os.stat(cix_path).st_mtime
        self.res_index[res.area_path] \
            = (res_id, last_updated, name, res_data)
Пример #3
0
    def update_buf_data(self,
                        buf,
                        scan_tree,
                        scan_time,
                        scan_error,
                        skip_scan_time_check=False):
        """Update this MultiLangZone with the buffer data.

        @param buf {CitadelBuffer} the buffer whose data is being added
            to the database.
        @param scan_tree {ciElementTree} the CIX scan data. Might be None
            if there was an early scanning failure.
        @param scan_time {timestamp} the time of the scan, typically the
            mtime of the file
        @param scan_error {str} an error string if scanning failed, or
            None if it was succesful.
        @param skip_scan_time_check {boolean} (default False) is a
            boolean indicating if the buffer data should be updated even
            if `scan_time` is <= that in the database.
        """
        self._acquire_lock()
        try:
            # TODO: Canonicalize path (or assert that it is canonicalized)
            dir, base = split(buf.path)

            # Get the current data, if any.
            res_index = self.load_index(dir, "res_index", {})
            res_index_has_changed = False
            blob_index = self.load_index(dir, "blob_index", {})
            blob_index_has_changed = False
            is_hits_from_lpath_lang = self.lang in self.db.import_everything_langs
            if is_hits_from_lpath_lang:
                # TODO: Not sure {} for a default is correct here.
                toplevelname_index = self.load_index(dir, "toplevelname_index",
                                                     {})
                toplevelname_index_has_changed = False
            try:
                (old_scan_time, old_scan_error, old_res_data) = res_index[base]
            except KeyError:  # adding a new entry
                (old_scan_time, old_scan_error, old_res_data) = None, None, {}
            else:  # updating an existing entry
                if not skip_scan_time_check and scan_time is not None \
                   and scan_time <= old_scan_time:
                    log.debug(
                        "skipping db update for '%s': %s < %s and "
                        "no 'skip_scan_time_check' option", base, scan_time,
                        old_scan_time)
                    return

            log.debug("update from %s buf '%s'", buf.lang, buf.path)

            # Parse the tree and get the list of blobnames.
            # res_data: {lang -> blobname -> ilk -> toplevelnames}
            new_res_data = {}
            new_blob_from_lang_and_blobname = {}
            if scan_tree:
                for blob in scan_tree[0]:
                    lang = blob.get("lang")
                    blobname = blob.get("name")
                    new_blob_from_lang_and_blobname[(lang, blobname)] = blob
                    tfifb = new_res_data.setdefault(lang, {})
                    toplevelnames_from_ilk = tfifb.setdefault(blobname, {})
                    for toplevelname, elem in blob.names.items():
                        ilk = elem.get("ilk") or elem.tag
                        if ilk not in toplevelnames_from_ilk:
                            toplevelnames_from_ilk[ilk] = set([toplevelname])
                        else:
                            toplevelnames_from_ilk[ilk].add(toplevelname)
                        # For PHP namespaces, we also want to add all namespace
                        # child items, as this will make it easy for tree_php
                        # to lookup a Fully Qualified Namespace (FQN).
                        if ilk == "namespace" and lang == "PHP":
                            for childname, childelem in elem.names.items():
                                child_ilk = childelem.get(
                                    "ilk") or childelem.tag
                                child_fqn = "%s\\%s" % (toplevelname,
                                                        childname)
                                if child_ilk not in toplevelnames_from_ilk:
                                    toplevelnames_from_ilk[child_ilk] = set(
                                        [child_fqn])
                                else:
                                    toplevelnames_from_ilk[child_ilk].add(
                                        child_fqn)

            # Determine necessary changes to res_index.
            if scan_error:
                if (scan_time != old_scan_time
                        or scan_error != old_scan_error):
                    res_index[base] = (scan_time, scan_error, old_res_data)
                    res_index_has_changed = True

            else:
                # Only consider new blobs if there wasn't a scan error.
                # I.e., we want to preserve the last good scan info.

                if (scan_time != old_scan_time or scan_error != old_scan_error
                        or new_res_data != old_res_data):
                    res_index[base] = (scan_time, scan_error, new_res_data)
                    res_index_has_changed = True

                if is_hits_from_lpath_lang:
                    if new_res_data != old_res_data:
                        toplevelname_index.update(base, old_res_data,
                                                  new_res_data)
                        toplevelname_index_has_changed = True

                # Determine necessary changes to dbfile_from_blobname index
                # and the dbfiles and then make them.
                dbfile_changes = []
                for (lang, blobname), blob \
                        in list(new_blob_from_lang_and_blobname.items()):
                    try:
                        old_res_data[lang][blobname]
                    except KeyError:
                        dbfile_changes.append(("add", lang, blobname, blob))
                    else:
                        dbfile_changes.append(("update", lang, blobname, blob))

                for lang, old_tfifb in list(old_res_data.items()):
                    for blobname in old_tfifb:
                        try:
                            new_res_data[lang][blobname]
                        except KeyError:
                            dbfile_changes.append(
                                ("remove", lang, blobname, None))

                dhash = self.dhash_from_dir(dir)
                for action, lang, blobname, blob in dbfile_changes:
                    if action == "add":
                        dbfile = self.db.bhash_from_blob_info(
                            buf.path, lang, blobname)
                        blob_index.setdefault(lang, {})[blobname] = dbfile
                        blob_index_has_changed = True
                        dbdir = join(self.base_dir, dhash)
                        if not exists(dbdir):
                            self._mk_dbdir(dbdir, dir)
                        # XXX What to do on write failure?
                        log.debug("fs-write: %s|%s blob '%s/%s'", self.lang,
                                  lang, dhash, dbfile)
                        if blob.get("src") is None:
                            blob.set("src",
                                     buf.path)  # for defns_from_pos() support
                        ET.ElementTree(blob).write(
                            join(dbdir, dbfile + ".blob"))
                    elif action == "remove":
                        dbfile = blob_index[lang][blobname]
                        del blob_index[lang][blobname]
                        blob_index_has_changed = True
                        # XXX What to do on removal failure?
                        log.debug("fs-write: remove %s|%s blob '%s/%s'",
                                  self.lang, lang, dhash, dbfile)
                        try:
                            os.remove(
                                join(self.base_dir, dhash, dbfile + ".blob"))
                        except EnvironmentError as ex:
                            self.db.corruption(
                                "MultiLangZone.update_buf_data",
                                "could not remove dbfile for '%s' blob: %s" %
                                (blobname, ex), "ignore")
                    elif action == "update":
                        # Try to only change the dbfile on disk if it is
                        # different.
                        s = BytesIO()
                        if blob.get("src") is None:
                            blob.set("src",
                                     buf.path)  # for defns_from_pos() support
                        ET.ElementTree(blob).write(s)
                        new_dbfile_content = s.getvalue()
                        dbfile = blob_index[lang][blobname]
                        dbpath = join(self.base_dir, dhash, dbfile + ".blob")
                        # PERF: Might be nice to cache the new dbfile
                        #       content for the next time this resource is
                        #       updated. For files under edit this will be
                        #       common. I.e. just for the "editset".
                        try:
                            fin = open(dbpath, 'rb')
                        except (OSError, IOError) as ex:
                            # Technically if the dbfile doesn't exist, this
                            # is a sign of database corruption. No matter
                            # though (for this blob anyway), we are about to
                            # replace it.
                            old_dbfile_content = None
                        else:
                            try:
                                old_dbfile_content = fin.read()
                            finally:
                                fin.close()
                        if new_dbfile_content != old_dbfile_content:
                            if not exists(dirname(dbpath)):
                                self._mk_dbdir(dirname(dbpath), dir)
                            # XXX What to do if fail to write out file?
                            log.debug("fs-write: %s|%s blob '%s/%s'",
                                      self.lang, lang, dhash, dbfile)
                            fout = open(dbpath, 'wb')
                            try:
                                fout.write(new_dbfile_content)
                            finally:
                                fout.close()

            if res_index_has_changed:
                self.changed_index(dir, "res_index")
            if blob_index_has_changed:
                self.changed_index(dir, "blob_index")
            if is_hits_from_lpath_lang and toplevelname_index_has_changed:
                self.changed_index(dir, "toplevelname_index")
        finally:
            self._release_lock()
Пример #4
0
    def _add_res(self, res, lang, name, ver):
        log.debug("%s stdlibs: add %s", lang, res)
        cix_path = res.path
        try:
            tree = tree_from_cix_path(cix_path)
        except ET.XMLParserError as ex:
            log.warn("could not load %s stdlib from `%s' (%s): skipping", name,
                     cix_path, ex)
            return

        dbdir = join(self.base_dir, name)
        if exists(dbdir):
            log.warn(
                "`db/stdlibs/%s' already exists and should not: "
                "removing it", name)
            try:
                rmdir(dbdir)
            except OSError as ex:
                log.error(
                    "could not remove `%s' to create %s stdlib in "
                    "database (%s): skipping", dbdir, name)
        if not exists(dbdir):
            os.makedirs(dbdir)

        # Create 'blob_index' and 'toplevel*_index' and write out
        # '.blob' file.
        LEN_PREFIX = self.db.LEN_PREFIX
        is_hits_from_lpath_lang = lang in self.db.import_everything_langs
        blob_index = {}  # {blobname -> dbfile}
        toplevelname_index = {}  # {ilk -> toplevelname -> blobnames}
        toplevelprefix_index = {}  # {ilk -> prefix -> toplevelnames}
        for blob in tree.findall("file/scope"):
            assert lang == blob.get("lang"), \
                "Adding %s resource %s to %s blob" % (
                    lang, res, blob.get("lang"))
            blobname = blob.get("name")
            dbfile = self.db.bhash_from_blob_info(cix_path, lang, blobname)
            blob_index[blobname] = dbfile
            ET.ElementTree(blob).write(join(dbdir, dbfile + ".blob"))
            for toplevelname, elem in blob.names.items():
                if "__local__" in elem.get("attributes", "").split():
                    # this is internal to the stdlib
                    continue
                ilk = elem.get("ilk") or elem.tag
                bft = toplevelname_index.setdefault(ilk, {})
                if toplevelname not in bft:
                    bft[toplevelname] = set([blobname])
                else:
                    bft[toplevelname].add(blobname)
                prefix = toplevelname[:LEN_PREFIX]
                tfp = toplevelprefix_index.setdefault(ilk, {})
                if prefix not in tfp:
                    tfp[prefix] = set([toplevelname])
                else:
                    tfp[prefix].add(toplevelname)

        self.db.save_pickle(join(dbdir, "blob_index"), blob_index)
        self.db.save_pickle(join(dbdir, "toplevelname_index"),
                            toplevelname_index)
        self.db.save_pickle(join(dbdir, "toplevelprefix_index"),
                            toplevelprefix_index)

        mtime = os.stat(cix_path).st_mtime
        self.res_index[res.area_path] = mtime