示例#1
0
 def from_index(index_props):
     id = index_props[0]
     # if the note was stored in the index, its title, text, and source fields are collapsed into a single field, separated by \u001f
     text = index_props[4]
     title = text.split("\u001f")[0]
     body = text.split("\u001f")[1]
     src = text.split("\u001f")[2]
     return SiacNote((id, title, body, src, index_props[2], -1, "", "", "", "", -1))
示例#2
0
    def _most_common_words(self, text):
        """ Returns the html that is displayed in the right sidebar containing the clickable keywords. """

        if text is None or len(text) == 0:
            return "No keywords for empty result."

        text = utility.text.clean(text, self.stopwords)
        counts = {}
        for token in text.split():
            if token == "" or len(token) == 1 or self.EXCLUDE_KEYWORDS.match(
                    token):
                continue
            if token.lower() in counts:
                counts[token.lower()][1] += 1
            else:
                counts[token.lower()] = [token, 1]

        sortedCounts = sorted(counts.items(),
                              key=lambda kv: kv[1][1],
                              reverse=True)
        html = ""

        for entry in sortedCounts[:15]:
            k = utility.text.trim_if_longer_than(entry[1][0], 25)
            kd = entry[1][0].replace("'", "")
            html = f"{html}<a class='keyword' href='#' data-keyword='{kd}' onclick='event.preventDefault(); searchFor($(this).data(\"keyword\"));'>{k}</a>, "

        if len(html) == 0:
            return "No keywords for empty result."

        return html[:-2]
示例#3
0
 def _mostCommonWords(self, text):
     """
     Returns the html that is displayed in the right sidebar containing the clickable keywords.
     """
     if text is None or len(text) == 0:
         return "No keywords for empty result."
     text = utility.text.clean(text, self.stopwords)
     counts = {}
     for token in text.split():
         if token == "" or len(token) == 1 or self.SOUND_TAG.match(token):
             continue
         if token.lower() in counts:
             counts[token.lower()][1] += 1
         else:
             counts[token.lower()] = [token, 1]
     sortedCounts = sorted(counts.items(),
                           key=lambda kv: kv[1][1],
                           reverse=True)
     html = ""
     for entry in sortedCounts[:15]:
         html = "%s<a class='keyword' href='#' onclick='event.preventDefault(); searchFor($(this).text())'>%s</a>, " % (
             html, entry[1][0])
     if len(html) == 0:
         return "No keywords for empty result."
     return html[:-2]
示例#4
0
 def removeStopwords(self, text):
     cleaned = ""
     for token in text.split(" "):
         if token.lower() not in self.stopWords:
             cleaned += token + " "
     if len(cleaned) > 0:
         return cleaned[:-1]
     return ""
示例#5
0
    def get_result_html_simple(self, db_list, tag_hover = True, search_on_selection = True):

        html            = ""
        epochTime       = int(time.time() * 1000)
        timeDiffString  = ""
        newNote         = ""
        ret             = 0
        nids            = [r.id for r in db_list]

        if self.showRetentionScores:
            retsByNid   = getRetentions(nids)

        for counter, res in enumerate(db_list):
            try:
                timeDiffString = self._get_time_diff_lbl(res[3], epochTime)
            except:
                timeDiffString = "Could not determine creation date"
            ret = retsByNid[int(res.id)] if self.showRetentionScores and int(res.id) in retsByNid else None

            if ret is not None:
                retMark = "border-color: %s;" % (utility.misc._retToColor(ret))
                retInfo = """<div class='retMark' style='%s'>PR: %s</div> """ % (retMark, int(ret))
            else:
                retInfo = ""

            text = res.get_content()

            # hide fields that should not be shown
            if str(res.mid) in self.fields_to_hide_in_results:
                text = "\u001f".join([spl for i, spl in enumerate(text.split("\u001f")) if i not in self.fields_to_hide_in_results[str(res.mid)]])

            # hide cloze brackets if set in config
            if not self.show_clozes:
                text = utility.text.hide_cloze_brackets(text)

            #remove <div> tags if set in config
            if self.remove_divs and res.note_type != "user":
                text = utility.text.remove_divs(text)

            text        = utility.text.clean_field_separators(text).replace("\\", "\\\\").replace("`", "\\`").replace("$", "&#36;")
            text        = utility.text.try_hide_image_occlusion(text)
            #try to put fields that consist of a single image in their own line
            text        = utility.text.newline_before_images(text)
            template    = NOTE_TMPL_SIMPLE if res.note_type == "index" else NOTE_TMPL_SIAC_SIMPLE
            newNote     = template.format(
                counter=counter+1,
                nid=res.id,
                edited="" if str(res.id) not in self.edited else "<i class='fa fa-pencil ml-10 mr-5'></i> " + self._build_edited_info(self.edited[str(res.id)]),
                mouseup="getSelectionText()" if search_on_selection else "",
                text=text,
                ret=retInfo,
                tags=utility.tags.build_tag_string(res.tags, tag_hover, maxLength = 25, maxCount = 2),
                creation="&nbsp;&#128336; " + timeDiffString)
            html        += newNote

        return html
示例#6
0
    def _build_non_anki_note_html(self, text):
        """
        User's notes should be displayed in a way to visually distinguish between title, text and source.
        Also, text might need to be cut if is too long to reduce time needed for highlighting, extracting keywords, and rendering.
        """
        #trim very long texts:
        if len(text) > 5000:
            src_begin_index = text.rfind("\u001f")
            src = text[src_begin_index + 1:]
            title = text[:text.find("\u001f")]
            body = text[text.find("\u001f") + 1:src_begin_index][:5000]
            #there might be unclosed tags now, but parsing would be too much overhead, so simply remove div, a and span tags
            #there might be still problems with <p style='...'>
            body = utility.text.remove_tags(body, ["div", "span", "a"])
            last_open_bracket = body.rfind("<")
            if last_open_bracket >= len(body) - 500 or body.rfind(
                    " ") < len(body) - 500:
                last_close_bracket = body.rfind(">")
                if last_close_bracket < last_open_bracket:
                    body = body[:last_open_bracket]
            body += "<br></ul></b></i></em></span></p></p><p style='text-align: center; user-select: none;'><b>(Text was cut - too long to display)</b></p>"
        else:
            title = text.split("\u001f")[0]
            body = text.split("\u001f")[1]
            src = text.split("\u001f")[2]
        is_pdf = src is not None and src.lower().strip().endswith(".pdf")
        title = "%s<b>%s</b>%s" % (
            "<span class='siac-pdf-icon'></span>" if is_pdf else "",
            title if len(title) > 0 else "Unnamed Note",
            "<hr style='margin-bottom: 5px; border-top: dotted 2px;'>"
            if len(body.strip()) > 0 else "")
        if src is not None and len(src) > 0:
            src = "<br/><hr style='border-top: dotted 2px;'><i>Source: %s</i>" % (
                src)
        else:
            src = ""

        return title + body + src
示例#7
0
    def updateSingle(self, note):
        """
        Used after note has been edited. The edited note should be rerendered.
        To keep things simple, only note text and tags are replaced.
        """
        if self._editor is None or self._editor.web is None:
            return

        tags = note[2]
        tagStr = utility.tags.build_tag_string(tags, self.gridView)
        nid = note[0]
        text = note[1]

        # hide fields that should not be shown
        if len(note) > 4 and str(note[4]) in self.fields_to_hide_in_results:
            text = "\u001f".join([
                spl for i, spl in enumerate(text.split("\u001f"))
                if i not in self.fields_to_hide_in_results[str(note[4])]
            ])

        text = utility.text.cleanFieldSeparators(text).replace(
            "\\", "\\\\").replace("`", "\\`").replace("$", "&#36;")
        text = utility.text.try_hide_image_occlusion(text)
        text = utility.text.newline_before_images(text)

        #find rendered note and replace text and tags
        self._editor.web.eval("""
            document.getElementById('%s').innerHTML = `%s`;
            document.getElementById('tags-%s').innerHTML = `%s`;
        """ % (nid, text, nid, tagStr))

        self._editor.web.eval(
            "$('#cW-%s').find('.rankingLblAddInfo').hide();" % nid)
        self._editor.web.eval(
            "fixRetMarkWidth(document.getElementById('cW-%s'));" % nid)
        self._editor.web.eval(
            f"""$('#cW-{nid} .editedStamp').html(`&nbsp;&#128336; Edited just now`).show();
            if ($('#siac-susp-lbl-{nid}').length) {{
                $('#siac-susp-lbl-{nid}').css('left', '140px').show();
            }} 
        """)
示例#8
0
    def updateSingle(self, note):
        """
        Used after note has been edited. The edited note should be rerendered.
        To keep things simple, only note text and tags are replaced.
        """
        if self.editor is None or self.editor.web is None:
            return
        tags = note[2]
        tagStr = self.buildTagString(tags)
        nid = note[0]
        text = note[1]

        # hide fields that should not be shown
        if len(note) > 4 and str(note[4]) in self.fields_to_hide_in_results:
            text = "\u001f".join([
                spl for i, spl in enumerate(text.split("\u001f"))
                if i not in self.fields_to_hide_in_results[str(note[4])]
            ])

        text = self._cleanFieldSeparators(text).replace("\\", "\\\\").replace(
            "`", "\\`").replace("$", "&#36;")
        text = self.tryHideImageOcclusion(text)
        text = self.IMG_FLD.sub("|</span><br/>\\1<br/>\\2", text)

        #find rendered note and replace text and tags
        self.editor.web.eval("""
            document.getElementById('%s').innerHTML = `%s`;
            document.getElementById('tags-%s').innerHTML = `%s`;
        """ % (nid, text, nid, tagStr))

        self.editor.web.eval("$('#cW-%s').find('.rankingLblAddInfo').hide();" %
                             nid)
        self.editor.web.eval(
            "fixRetMarkWidth(document.getElementById('cW-%s'));" % nid)
        self.editor.web.eval(
            "$('#cW-%s .editedStamp').html(`&nbsp;&#128336; Edited just now`).show();"
            % nid)
示例#9
0
    def update_single(self, note):
        """
        Used after note has been edited. The edited note should be rerendered.
        To keep things simple, only note text and tags are replaced.
        """
        if self._editor is None or self._editor.web is None:
            return

        tags    = note[2]
        tagStr  = utility.tags.build_tag_string(tags, self.gridView)
        nid     = note[0]
        text    = note[1]

             # hide fields that should not be shown
        if len(note) > 4 and str(note[4]) in self.fields_to_hide_in_results:
            text = "\u001f".join([spl for i, spl in enumerate(text.split("\u001f")) if i not in self.fields_to_hide_in_results[str(note[4])]])

        text    = utility.text.clean_field_separators(text).replace("\\", "\\\\").replace("`", "\\`").replace("$", "&#36;")
        text    = utility.text.try_hide_image_occlusion(text)

        # hide clozes if set in config
        if not self.show_clozes:
            text    = utility.text.hide_cloze_brackets(text)

        text    = utility.text.newline_before_images(text)

        if self.remove_divs:
            text    = utility.text.remove_divs(text, " ")

        #find rendered note and replace text and tags
        self._editor.web.eval("""
            document.getElementById('siac-inner-card-%s').innerHTML = `%s`;
            document.getElementById('tags-%s').innerHTML = `%s`;
        """ % (nid, text, nid, tagStr))

        self._editor.web.eval(f"""$('#siac-edited-dsp-{nid}').html(`<i class='fa fa-pencil mr-5 ml-10'></i> Edited just now`); """)
示例#10
0
    def print_search_results(self,
                             notes,
                             stamp,
                             editor=None,
                             logging=False,
                             printTimingInfo=False,
                             page=1,
                             query_set=None,
                             is_queue=False,
                             is_cached=False):
        """
        This is the html that gets rendered in the search results div.
        This will always print the first page.
        """

        if logging:
            log("Entering print_search_results")
            log("Length (searchResults): " + str(len(notes)))

        if stamp is not None:
            if stamp != self.latest:
                return

        if not is_cached and len(notes) > 0:
            self.previous_calls.append([
                notes, None, editor, logging, printTimingInfo, page, query_set,
                is_queue
            ])
            if len(self.previous_calls) > 11:
                self.previous_calls.pop(0)

        html = ""
        allText = ""
        tags = []
        epochTime = int(time.time() * 1000)
        timeDiffString = ""
        newNote = ""
        lastNote = ""
        ret = 0
        self.last_had_timing_info = printTimingInfo

        if notes is not None and len(notes) > 0:
            self.lastResults = notes
            self.last_query_set = query_set

        searchResults = notes[(page - 1) * 50:page * 50]
        nids = [r.id for r in searchResults]

        if self.showRetentionScores:
            retsByNid = getRetentions(nids)

        # various time stamps to collect information about rendering performance
        start = time.time()
        highlight_start = None
        build_user_note_start = None

        highlight_total = 0.0
        build_user_note_total = 0.0

        remaining_to_highlight = {}
        highlight_boundary = 15 if self.gridView else 10

        # for better performance, collect all notes that are .pdfs, and
        # query their reading progress after they have been rendered
        pdfs = []

        check_for_suspended = []

        for counter, res in enumerate(searchResults):
            nid = res.id
            counter += (page - 1) * 50
            try:
                timeDiffString = self._getTimeDifferenceString(nid, epochTime)
            except:
                if logging:
                    log("Failed to determine creation date: " + str(nid))
                timeDiffString = "Could not determine creation date"
            ret = retsByNid[int(nid)] if self.showRetentionScores and int(
                nid) in retsByNid else None

            if ret is not None:
                retMark = "background: %s; color: black;" % (
                    utility.misc._retToColor(ret))
                if str(nid) in self.edited:
                    retMark = ''.join((retMark, "max-width: 20px;"))
                retInfo = """<div class='retMark' style='%s'>%s</div>""" % (
                    retMark, int(ret))
            else:
                retInfo = ""

            lastNote = newNote

            #non-anki notes should be displayed differently, we distinguish between title, text and source here
            #confusing: 'source' on notes from the index means the original note content (without stopwords removed etc.),
            #on SiacNotes, it means the source field.
            build_user_note_start = time.time()
            text = res.get_content()
            progress = ""
            pdf_class = ""
            if res.note_type == "user" and res.is_pdf():
                pdfs.append(nid)
                p_html = "<div class='siac-prog-sq'></div>" * 10
                progress = f"<div id='ptmp-{nid}' class='siac-prog-tmp'>{p_html} <span>&nbsp;0 / ?</span></div>"
                pdf_class = "pdf"
            elif res.note_type == "index" and res.did > 0:
                check_for_suspended.append(res.id)

            build_user_note_total += time.time() - build_user_note_start

            # hide fields that should not be shown
            if str(res.mid) in self.fields_to_hide_in_results:
                text = "\u001f".join([
                    spl for i, spl in enumerate(text.split("\u001f"))
                    if i not in self.fields_to_hide_in_results[str(res.mid)]
                ])

            #remove double fields separators
            text = utility.text.cleanFieldSeparators(text).replace(
                "\\", "\\\\")

            #try to remove image occlusion fields
            text = utility.text.try_hide_image_occlusion(text)

            #try to put fields that consist of a single image in their own line
            text = utility.text.newline_before_images(text)

            #remove <div> tags if set in config
            if self.remove_divs and res.note_type != "user":
                text = utility.text.remove_divs(text, " ")

            #highlight
            highlight_start = time.time()
            if query_set is not None:
                if counter - (page - 1) * 50 < highlight_boundary:
                    text = utility.text.mark_highlights(text, query_set)
                else:
                    remaining_to_highlight[nid] = ""
            highlight_total += time.time() - highlight_start

            if query_set is not None and counter - (
                    page - 1) * 50 >= highlight_boundary:
                remaining_to_highlight[nid] = text

            gridclass = "grid" if self.gridView else ""
            if self.gridView and len(text) < 200:
                if self.scale < 0.8:
                    gridclass = ' '.join((gridclass, "grid-smaller"))
                else:
                    gridclass = ' '.join((gridclass, "grid-small"))
            elif self.gridView and self.scale < 0.8:
                gridclass = ' '.join((gridclass, "grid-small"))

            elif self.gridView and len(text) > 700 and self.scale > 0.8:
                gridclass = ' '.join((gridclass, "grid-large"))

            if self.scale != 1.0:
                gridclass = ' '.join([
                    gridclass,
                    "siac-sc-%s" % str(self.scale).replace(".", "-")
                ])

            # use either the template for addon's notes or the normal
            if res.note_type == "user":
                newNote = noteTemplateUserNote.format(
                    grid_class=gridclass,
                    counter=counter + 1,
                    nid=nid,
                    creation="&nbsp;&#128336; " + timeDiffString,
                    edited=""
                    if str(nid) not in self.edited else "&nbsp;&#128336; " +
                    self._buildEditedInfo(self.edited[str(nid)]),
                    mouseup="getSelectionText()" if not is_queue else "",
                    text=text,
                    tags=utility.tags.build_tag_string(res.tags,
                                                       self.gridView),
                    queue=": Q-%s&nbsp;" %
                    (res.position + 1) if res.is_in_queue() else "",
                    progress=progress,
                    pdf_class=pdf_class,
                    ret=retInfo)
            else:
                newNote = noteTemplate.format(
                    grid_class=gridclass,
                    counter=counter + 1,
                    nid=nid,
                    creation="&nbsp;&#128336; " + timeDiffString,
                    edited=""
                    if str(nid) not in self.edited else "&nbsp;&#128336; " +
                    self._buildEditedInfo(self.edited[str(nid)]),
                    mouseup="getSelectionText()" if not is_queue else "",
                    text=text,
                    tags=utility.tags.build_tag_string(res.tags,
                                                       self.gridView),
                    ret=retInfo)

            html = f"{html}{newNote}"
            tags = self._addToTags(tags, res.tags)
            if counter - (page - 1) * 50 < 20:
                # todo: title for user notes
                allText = f"{allText} {res.text[:5000]}"
        tags.sort()
        html = html.replace("`", "&#96;").replace("$", "&#36;")
        pageMax = math.ceil(len(notes) / 50.0)
        if get_index() is not None and get_index().lastResDict is not None:
            get_index().lastResDict["time-html"] = int(
                (time.time() - start) * 1000)
            get_index().lastResDict["time-html-highlighting"] = int(
                highlight_total * 1000)
            get_index().lastResDict["time-html-build-user-note"] = int(
                build_user_note_total * 1000)
        if stamp is None and self.last_took is not None:
            took = self.last_took
            stamp = -1
        elif stamp is not None:
            took = utility.misc.get_milisec_stamp() - stamp
            self.last_took = took
        else:
            took = "?"
        timing = "true" if printTimingInfo else "false"

        if not self.hideSidebar:
            infoMap = {
                "Took":
                "<b>%s</b> ms %s" %
                (took,
                 "&nbsp;<b style='cursor: pointer' onclick='pycmd(`siac-last-timing`)'>&#9432;</b>"
                 if printTimingInfo else ""),
                "Found":
                "<b>%s</b> notes" % (len(notes) if len(notes) > 0 else
                                     "<span style='color: red;'>0</span>")
            }
            info = self.build_info_table(infoMap, tags, allText)
            cmd = "setSearchResults(`%s`, `%s`, %s, page=%s, pageMax=%s, total=%s, cacheSize=%s, stamp=%s, printTiming=%s);" % (
                html, info[0].replace("`", "&#96;"), json.dumps(info[1]), page,
                pageMax, len(notes), len(self.previous_calls), stamp, timing)
        else:
            cmd = "setSearchResults(`%s`, ``, null, page=%s , pageMax=%s, total=%s, cacheSize=%s, stamp=%s, printTiming=%s);" % (
                html, page, pageMax, len(notes), len(
                    self.previous_calls), stamp, timing)
        cmd = f"{cmd}updateSwitchBtn({len(notes)});"

        self._js(cmd, editor)

        if len(remaining_to_highlight) > 0:
            cmd = ""
            for nid, text in remaining_to_highlight.items():
                cmd = ''.join(
                    (cmd, "document.getElementById('%s').innerHTML = `%s`;" %
                     (nid, utility.text.mark_highlights(text, query_set))))
            self._js(cmd, editor)

        if len(check_for_suspended) > 0:
            susp = get_suspended(check_for_suspended)
            if len(susp) > 0:
                cmd = ""
                for nid in susp:
                    cmd = f"{cmd}$('#cW-{nid}').after(`<span id='siac-susp-lbl-{nid}' onclick='pycmd(\"siac-unsuspend-modal {nid}\")' class='siac-susp-lbl'>SUSPENDED</span>`);"
                    if str(nid) in self.edited:
                        cmd = f"{cmd} $('#siac-susp-lbl-{nid}').css('left', '140px');"
                self._js(cmd, editor)

        if len(pdfs) > 0:
            pdf_info_list = get_pdf_info(pdfs)

            if pdf_info_list is not None and len(pdf_info_list) > 0:
                cmd = ""
                for i in pdf_info_list:
                    perc = int(i[1] * 10.0 / i[2])
                    prog_bar = ""
                    for x in range(0, 10):
                        if x < perc:
                            prog_bar = ''.join(
                                (prog_bar,
                                 "<div class='siac-prog-sq-filled'></div>"))
                        else:
                            prog_bar = ''.join(
                                (prog_bar, "<div class='siac-prog-sq'></div>"))
                    cmd = ''.join((
                        cmd,
                        "document.querySelector('#ptmp-%s').innerHTML = `%s &nbsp;<span>%s / %s</span>`;"
                        % (i[0], prog_bar, i[1], i[2])))
                self._js(cmd, editor)

        return (highlight_total * 1000, build_user_note_total)
示例#11
0
    def searchProc(self, text, decks, only_user_notes, print_mode):
        resDict                     = {}
        start                       = time.time()
        orig                        = text
        text                        = self.clean(text)
        resDict["time-stopwords"]   = int((time.time() - start) * 1000)
        self.lastSearch             = (text, decks, "default", orig)

        if self.logging:
            log("\nFTS index - Received query: " + text)
            log("Decks (arg): " + str(decks))
            log("Self.pinned: " + str(self.pinned))
            log("Self.limit: "  + str(self.limit))


        if len(text) == 0:
            if print_mode == "default":
                UI.empty_result("Query was empty after cleaning.<br/><br/><b>Query:</b> <i>%s</i>" % utility.text.trim_if_longer_than(orig, 100).replace("\u001f", "").replace("`", "&#96;"))
                if mw.addonManager.getConfig(__name__)["hideSidebar"]:
                    return "Found 0 notes. Query was empty after cleaning."
                return None
            elif print_mode == "pdf":
                return None

        start                       = time.time()
        text                        = utility.text.expand_by_synonyms(text, self.synonyms)
        resDict["time-synonyms"]    = int((time.time() - start) * 1000)
        resDict["query"]            = text

        if utility.text.text_too_small(text):
            if self.logging:
                log("Returning - Text was < 2 chars: " + text)
            return { "results" : [] }

        tokens                      = text.split(" ")
        if len(tokens) > 10:
            tokens                  = set(tokens)
        if self.type == "SQLite FTS5":
            query = u" OR ".join(["tags:" + s.strip().replace("OR", "or") for s in tokens if not utility.text.text_too_small(s) ])
            query += " OR " + " OR ".join(["text:" + s.strip().replace("OR", "or") for s in tokens if not utility.text.text_too_small(s) ])
        else:
            query = " OR ".join([s.strip().replace("OR", "or") for s in tokens if not utility.text.text_too_small(s) ])
        if len(query) == 0 or query == " OR ":

            if self.logging:
                log("Returning. Query was: " + query)
            return { "results" : [] }

        c                           = 0
        resDict["decks"]            = decks
        allDecks                    = "-1" in decks

        decks.append("-1")

        rList                       = list()
        user_note_filter            = "AND mid='-1'" if only_user_notes else ""
        conn                        = sqlite3.connect(self.dir + "search-data.db")

        if self.type == "SQLite FTS5":
            dbStr = "select nid, text, tags, did, source, bm25(notes) as score, mid, refs from notes where notes match '%s' %s order by score" %(query, user_note_filter)

        else:
            conn.create_function("simple_rank", 1, simple_rank)
            dbStr = "select nid, text, tags, did, source, simple_rank(matchinfo(notes)) as score, mid, refs from notes where text match '%s' %s order by score desc" %(query, user_note_filter)

        try:
            start                   = time.time()
            res                     = conn.execute(dbStr).fetchall()
            resDict["time-query"]   = int((time.time() - start) * 1000)
        except Exception as e:
            print("Executing match query threw exception: " + str(e))
            res                     = []
        finally:
            conn.close()
        if self.logging:
            log("dbStr was: " + dbStr)
            log("Result length of db query: " + str(len(res)))

        resDict["highlighting"] = self.highlighting
        # if self.type == "SQLite FTS5":
        for r in res:
            if not str(r[0]) in self.pinned and (allDecks or str(r[3]) in decks):

                if str(r[6]) == "-1":
                    rList.append(SiacNote.from_index(r))
                else:
                    rList.append(IndexNote(r))
                c += 1
                if c >= self.limit:
                    break


        if self.logging:
            log("Query was: " + query)
            log("Result length (after removing pinned and unselected decks): " + str(len(rList)))

        resDict["results"]          = rList[:min(self.limit, len(rList))]
        self.lastResDict            = resDict

        return resDict
示例#12
0
    def get_result_html_simple(self,
                               db_list,
                               tag_hover=True,
                               search_on_selection=True):
        html = ""
        epochTime = int(time.time() * 1000)
        timeDiffString = ""
        newNote = ""
        lastNote = ""
        nids = [r[3] for r in db_list]
        if self.showRetentionScores:
            retsByNid = getRetentions(nids)
        ret = 0
        for counter, res in enumerate(db_list):
            try:
                timeDiffString = self._getTimeDifferenceString(
                    res[3], epochTime)
            except:
                timeDiffString = "Could not determine creation date"
            ret = retsByNid[int(res[3])] if self.showRetentionScores and int(
                res[3]) in retsByNid else None

            if ret is not None:
                retMark = "background: %s; color: black;" % (
                    self._retToColor(ret))
                if str(res[3]) in self.edited:
                    retMark += "max-width: 20px;"
                retInfo = """<div class='retMark' style='%s'>%s</div>
                                """ % (retMark, int(ret))
            else:
                retInfo = ""

            lastNote = newNote
            text = res[0]

            #non-anki notes should be displayed differently, we distinguish between title, text and source here
            if str(res[2]) == "-1":
                text = self._build_non_anki_note_html(text)

            # hide fields that should not be shown
            if len(res) > 5 and str(res[5]) in self.fields_to_hide_in_results:
                text = "\u001f".join([
                    spl for i, spl in enumerate(text.split("\u001f"))
                    if i not in self.fields_to_hide_in_results[str(res[5])]
                ])

            #remove <div> tags if set in config
            if self.remove_divs:
                text = utility.text.remove_divs(text)

            text = self._cleanFieldSeparators(text).replace(
                "\\", "\\\\").replace("`", "\\`").replace("$", "&#36;")
            text = self.tryHideImageOcclusion(text)
            #try to put fields that consist of a single image in their own line
            text = self.IMG_FLD.sub("|</span><br/>\\1<br/>\\2", text)
            template = self.noteTemplateSimple if str(
                res[2]) != "-1" else self.noteTemplateUserNoteSimple
            newNote = template % (
                counter + 1, "&nbsp;&#128336; " + timeDiffString,
                "" if str(res[3]) not in self.edited else "&nbsp;&#128336; " +
                self._buildEditedInfo(self.edited[str(res[3])]), retInfo,
                res[3], res[3], "getSelectionText()"
                if search_on_selection else "", res[3], res[3], text,
                self.buildTagString(
                    res[1], tag_hover, maxLength=25, maxCount=2), res[3])

            html += newNote
        return html
示例#13
0
    def printSearchResults(self,
                           db_list,
                           stamp,
                           editor=None,
                           logging=False,
                           printTimingInfo=False,
                           page=1,
                           query_set=None,
                           is_queue=False):
        """
        This is the html that gets rendered in the search results div.
        This will always print the first page.
        Args:
        searchResults - a list of tuples, see SearchIndex.search()
        searchResults.0: highlighted note text
        searchResults.1: tags
        searchResults.2: did
        searchResults.3: nid
        searchResults.4: score (not used currently)
        searchResults.5: mid
        searchResults.6: refs (not used currently)
        searchResults.7: position in queue (only present if in queue)
        """
        if stamp is not None:
            if stamp != self.latest:
                if logging:
                    log("PrintSearchResults: Aborting because stamp != latest")
                return
        if logging:
            log("Entering printSearchResults")
            log("Length (searchResults): " + str(len(db_list)))
        html = ""
        allText = ""
        tags = []
        epochTime = int(time.time() * 1000)
        timeDiffString = ""
        newNote = ""
        lastNote = ""
        self.last_had_timing_info = printTimingInfo

        if db_list is not None and len(db_list) > 0:
            self.lastResults = db_list
            self.last_query_set = query_set

        searchResults = db_list[(page - 1) * 50:page * 50]
        nids = [r[3] for r in searchResults]

        if self.showRetentionScores:
            retsByNid = getRetentions(nids)
        ret = 0

        # various time stamps to collect information about rendering performance
        start = time.time()
        highlight_start = None
        build_user_note_start = None

        highlight_total = 0.0
        build_user_note_total = 0.0

        remaining_to_highlight = {}
        highlight_boundary = 15 if self.gridView else 10

        # for better performance, collect all notes that are .pdfs, and
        # query their reading progress after they have been rendered
        pdfs = []

        for counter, res in enumerate(searchResults):
            counter += (page - 1) * 50
            try:
                timeDiffString = self._getTimeDifferenceString(
                    res[3], epochTime)
            except:
                if logging:
                    log("Failed to determine creation date: " + str(res[3]))
                timeDiffString = "Could not determine creation date"
            ret = retsByNid[int(res[3])] if self.showRetentionScores and int(
                res[3]) in retsByNid else None

            if ret is not None:
                retMark = "background: %s; color: black;" % (
                    self._retToColor(ret))
                if str(res[3]) in self.edited:
                    retMark += "max-width: 20px;"
                retInfo = """<div class='retMark' style='%s'>%s</div>
                                """ % (retMark, int(ret))
            else:
                retInfo = ""

            lastNote = newNote
            text = res[0]

            #non-anki notes should be displayed differently, we distinguish between title, text and source here
            build_user_note_start = time.time()
            if str(res[2]) == "-1":
                src = text.split("\u001f")[2]
                text = self._build_non_anki_note_html(text)
                if src.endswith(".pdf"):
                    pdfs.append(res[3])

            build_user_note_total += time.time() - build_user_note_start

            # hide fields that should not be shown
            if len(res) > 5 and str(res[5]) in self.fields_to_hide_in_results:
                text = "\u001f".join([
                    spl for i, spl in enumerate(text.split("\u001f"))
                    if i not in self.fields_to_hide_in_results[str(res[5])]
                ])

            #remove double fields separators
            text = self._cleanFieldSeparators(text).replace("\\", "\\\\")

            #try to remove image occlusion fields
            text = self.tryHideImageOcclusion(text)

            #try to put fields that consist of a single image in their own line
            text = self.IMG_FLD.sub("|</span><br/>\\1<br/>\\2", text)

            #remove <div> tags if set in config
            if self.remove_divs:
                text = utility.text.remove_divs(text, " ")

            #highlight
            highlight_start = time.time()
            if query_set is not None:
                if counter - (page - 1) * 50 < highlight_boundary:
                    text = self._markHighlights(text, query_set)
                else:
                    remaining_to_highlight[res[3]] = ""
            highlight_total += time.time() - highlight_start

            if query_set is not None and counter - (
                    page - 1) * 50 >= highlight_boundary:
                remaining_to_highlight[res[3]] = text

            gridclass = "grid" if self.gridView else ""
            if self.gridView and len(text) < 200:
                if self.scale < 0.8:
                    gridclass = ' '.join((gridclass, "grid-smaller"))
                else:
                    gridclass = ' '.join((gridclass, "grid-small"))
            elif self.gridView and self.scale < 0.8:
                gridclass = ' '.join((gridclass, "grid-small"))

            elif self.gridView and len(text) > 700 and self.scale > 0.8:
                gridclass = ' '.join((gridclass, "grid-large"))

            if self.scale != 1.0:
                gridclass = ' '.join([
                    gridclass,
                    "siac-sc-%s" % str(self.scale).replace(".", "-")
                ])

            # use either the template for addon's notes or the normal
            if str(res[2]) == "-1":
                newNote = self.noteTemplateUserNote % (
                    gridclass, counter + 1, res[3], counter + 1,
                    "&nbsp;&#128336; " + timeDiffString, ""
                    if str(res[3]) not in self.edited else "&nbsp;&#128336; " +
                    self._buildEditedInfo(self.edited[str(res[3])]), retInfo,
                    res[3], res[3], res[3], res[3], res[3], res[3], res[3],
                    res[3], "getSelectionText()" if not is_queue else "",
                    res[3], res[3], res[3], res[3], text, res[3],
                    self.buildTagString(res[1]), res[3], ": Q-%s&nbsp;" %
                    (res[7] + 1)
                    if len(res) >= 8 and res[7] is not None else "")
            else:
                newNote = self.noteTemplate % (
                    gridclass, counter + 1, res[3], counter + 1,
                    "&nbsp;&#128336; " + timeDiffString, ""
                    if str(res[3]) not in self.edited else "&nbsp;&#128336; " +
                    self._buildEditedInfo(self.edited[str(res[3])]), retInfo,
                    res[3], res[3], res[3], res[3], res[3], res[3], res[3],
                    res[3], res[3], res[3], res[3], text, res[3],
                    self.buildTagString(res[1]), res[3])
            # if self.gridView:
            #     if counter % 2 == 1:
            #         html += "<div class='gridRow'>%s</div>" % (lastNote + newNote)
            #     elif counter == len(searchResults) - 1:
            #         html += "<div class='gridRow'>%s</div>" % (newNote)
            # else:
            html += newNote
            tags = self._addToTags(tags, res[1])
            if counter - (page - 1) * 50 < 20:
                allText += " " + res[0][:5000]
        tags.sort()
        html = html.replace("`", "&#96;").replace("$", "&#36;")
        pageMax = math.ceil(len(db_list) / 50.0)
        if get_index().lastResDict is not None:
            get_index().lastResDict["time-html"] = int(
                (time.time() - start) * 1000)
            get_index().lastResDict["time-html-highlighting"] = int(
                highlight_total * 1000)
            get_index().lastResDict["time-html-build-user-note"] = int(
                build_user_note_total * 1000)
        if stamp is None and self.last_took is not None:
            took = self.last_took
        elif stamp is not None:
            took = utility.misc.get_milisec_stamp() - stamp
            self.last_took = took
        else:
            took = "?"
        if not self.hideSidebar:
            infoMap = {
                "Took":
                "<b>%s</b> ms %s" %
                (took,
                 "&nbsp;<b style='cursor: pointer' onclick='pycmd(`lastTiming`)'>&#9432;</b>"
                 if printTimingInfo else ""),
                "Found":
                "<b>%s</b> notes" % (len(db_list) if len(db_list) > 0 else
                                     "<span style='color: red;'>0</span>")
            }
            info = self.buildInfoTable(infoMap, tags, allText)
            cmd = "setSearchResults(`%s`, `%s`, %s, page=%s, pageMax=%s, total=%s);" % (
                html, info[0].replace("`", "&#96;"), json.dumps(
                    info[1]), page, pageMax, len(db_list))
        else:
            cmd = "setSearchResults(`%s`, ``, null, page=%s , pageMax=%s, total=%s);" % (
                html, page, pageMax, len(db_list))
        cmd += "updateSwitchBtn(%s)" % len(searchResults)

        if editor is None or editor.web is None:
            if self.editor is not None and self.editor.web is not None:
                if logging:
                    log("printing the result html...")
                self.editor.web.eval(cmd)
        else:
            if logging:
                log("printing the result html...")
            editor.web.eval(cmd)

        if len(remaining_to_highlight) > 0:
            cmd = ""
            for nid, text in remaining_to_highlight.items():
                cmd = ''.join(
                    (cmd, "document.getElementById('%s').innerHTML = `%s`;" %
                     (nid, self._markHighlights(text, query_set))))
            if editor is None or editor.web is None:
                if self.editor is not None and self.editor.web is not None:
                    self.editor.web.eval(cmd)
            else:
                editor.web.eval(cmd)

        if len(pdfs) > 0:
            pdf_info_list = get_pdf_info(pdfs)
            if pdf_info_list is not None and len(pdf_info_list) > 0:
                cmd = ""
                for i in pdf_info_list:
                    perc = int(i[1] * 10.0 / i[2])
                    prog_bar = ""
                    for x in range(0, 10):
                        if x < perc:
                            prog_bar += "<div class='siac-prog-sq-filled'></div>"
                        else:
                            prog_bar += "<div class='siac-prog-sq'></div>"
                    cmd = ''.join((
                        cmd,
                        "document.getElementById('%s').innerHTML += `<br><div style='margin-top: 5px;'>%s &nbsp;%s / %s</div>`;"
                        % (i[0], prog_bar, i[1], i[2])))
                if editor is None or editor.web is None:
                    if self.editor is not None and self.editor.web is not None:
                        self.editor.web.eval(cmd)
                else:
                    editor.web.eval(cmd)
示例#14
0
    def print_search_results(self, notes, stamp, editor=None, timing_info=False, page=1, query_set=None, is_cached=False):
        """
        This is the html that gets rendered in the search results div.
        This will always print the first page.
        """

        if stamp is not None:
            if stamp != self.latest:
                return

        # if we were on e.g. on page 2 which contains exactly one note (nr. 51 of 51 search results), and deleted that note, the
        # refresh call would still be to rerender page 2 with the updated search results,
        # but page 2 would not exist anymore, so we have to check for that:
        if (page - 1) * 50 > len(notes):
            page = page - 1

        # if this is true, avoid scrolling to the top of the search results again
        is_rerender                 = False

        if not is_cached and len(notes) > 0:

            # roughly check if current call equals the last one, to set is_rerender to True
            if len(self.previous_calls) > 0:
                nids = [n.id for n in self.previous_calls[-1][0][:30]]
                if query_set == self.previous_calls[-1][5] and page == self.previous_calls[-1][4] and nids == [n.id for n in notes[:30]]:
                    is_rerender = True

            # cache all calls to be able to repeat them
            self.previous_calls.append([notes, None, editor, timing_info, page, query_set])

            if len(self.previous_calls) > 11:
                self.previous_calls.pop(0)

        html                        = ""
        allText                     = ""
        tags                        = []
        epochTime                   = int(time.time() * 1000)
        timeDiffString              = ""
        newNote                     = ""
        ret                         = 0
        self.last_had_timing_info   = timing_info

        if notes is not None and len(notes) > 0:
            self.lastResults        = notes
            self.last_query_set     = query_set


        meta_notes_cnt              = 0
        while meta_notes_cnt < len(notes) and notes[meta_notes_cnt].note_type == "user" and notes[meta_notes_cnt].is_meta_note():
            meta_notes_cnt          += 1
        searchResults               = notes[(page- 1) * 50 + min(page - 1, 1) * meta_notes_cnt: page * 50 + meta_notes_cnt]
        nids                        = [r.id for r in searchResults]

        if self.showRetentionScores:
            retsByNid               = getRetentions(nids)

        # various time stamps to collect information about rendering performance
        start                       = time.time()
        highlight_start             = None
        build_user_note_start       = None

        highlight_total             = 0.0
        build_user_note_total       = 0.0

        remaining_to_highlight      = {}
        highlight_boundary          = 15 if self.gridView else 10

        # for better performance, collect all notes that are .pdfs, and
        # query their reading progress after they have been rendered
        pdfs                        = []

        check_for_suspended         = []

        meta_card_counter           = 0
        for counter, res in enumerate(searchResults):
            nid     = res.id
            counter += (page - 1)* 50
            try:
                timeDiffString = self._get_time_diff_lbl(nid, epochTime)
            except:
                timeDiffString = "Could not determine creation date"
            ret = retsByNid[int(nid)] if self.showRetentionScores and int(nid) in retsByNid else None

            if ret is not None:
                retMark = "border-color: %s;" % (utility.misc._retToColor(ret))
                retInfo = """<div class='retMark' style='%s'>Pass Rate: %s</div>""" % (retMark, int(ret))
            else:
                retInfo = ""

            # non-anki notes should be displayed differently, we distinguish between title, text and source here
            # confusing: 'source' on notes from the index means the original note content (without stopwords removed etc.),
            # on SiacNotes, it means the source field.
            build_user_note_start   = time.time()
            text                    = res.get_content()
            progress                = ""
            pdf_class               = ""
            if res.note_type == "user":
                icon = "book"
                if res.is_pdf():
                    pdfs.append(nid)
                    p_html              = "<div class='siac-prog-sq'></div>" * 10
                    progress            = f"<div id='ptmp-{nid}' class='siac-prog-tmp'>{p_html} <span>&nbsp;0 / ?</span></div><div style='display: inline-block;' id='siac-ex-tmp-{nid}'></div>"
                    pdf_class           = "pdf"
                elif int(res.id) < 0:
                    # meta card
                    pdf_class           = "meta"

                elif res.is_yt():
                    icon = "film"

                elif res.is_file():
                    icon = "external-link"
            elif res.note_type == "index" and res.did and res.did > 0:
                check_for_suspended.append(res.id)

            build_user_note_total   += time.time() - build_user_note_start

            # hide fields that should not be shown
            if str(res.mid) in self.fields_to_hide_in_results:
                text                = "\u001f".join([spl for i, spl in enumerate(text.split("\u001f")) if i not in self.fields_to_hide_in_results[str(res.mid)]])

            # remove double fields separators
            text                    = utility.text.clean_field_separators(text).replace("\\", "\\\\")

            # try to remove image occlusion fields
            text                    = utility.text.try_hide_image_occlusion(text)

            # if set in config, try to remove cloze brackets
            if not self.show_clozes:
                text                = utility.text.hide_cloze_brackets(text)

            # try to put fields that consist of a single image in their own line
            text                    = utility.text.newline_before_images(text)

            #remove <div> tags if set in config
            if self.remove_divs and res.note_type != "user":
                text                = utility.text.remove_divs(text, " ")

            #highlight
            highlight_start         = time.time()
            if query_set is not None:
                if counter - (page -1) * 50 < highlight_boundary:
                    text            = utility.text.mark_highlights(text, query_set)
                else:
                    remaining_to_highlight[nid] = ""
            highlight_total += time.time() - highlight_start

            if query_set is not None and counter - (page -1) * 50 >= highlight_boundary:
                remaining_to_highlight[nid] = text

            gridclass = "grid" if self.gridView else ""

            if self.scale != 1.0:
                gridclass = ' '.join([gridclass, "siac-sc-%s" % str(self.scale).replace(".", "-")])

            # use either the template for addon's notes or the normal
            if res.note_type == "user":

                template    = NOTE_TMPL_SIAC
                if res.is_meta_note():
                    template            = NOTE_TMPL_META
                    meta_card_counter   += 1
                newNote     = template.format(
                    grid_class  = gridclass,
                    counter     = counter + 1 - meta_card_counter,
                    nid         = nid,
                    creation    = "&nbsp;&#128336; " + timeDiffString,
                    edited      = "" if str(nid) not in self.edited else "<i class='fa fa-pencil ml-10 mr-5'></i> " + self._build_edited_info(self.edited[str(nid)]),
                    mouseup     = "getSelectionText()",
                    text        = text,
                    tags        = utility.tags.build_tag_string(res.tags, self.gridView),
                    queue       = ": Q-%s&nbsp;" % (res.position + 1) if res.is_in_queue() else "",
                    progress    = progress,
                    icon        = icon,
                    pdf_class   = pdf_class,
                    ret         = retInfo)

            else:
                newNote = NOTE_TMPL.format(
                    grid_class  = gridclass,
                    counter     = counter + 1 - meta_card_counter,
                    nid         = nid,
                    creation    = "&nbsp;&#128336; " + timeDiffString,
                    edited      = "" if str(nid) not in self.edited else "<i class='fa fa-pencil ml-10 mr-5'></i> " + self._build_edited_info(self.edited[str(nid)]),
                    mouseup     = "getSelectionText()",
                    text        = text,
                    tags        = utility.tags.build_tag_string(res.tags, self.gridView),
                    ret         = retInfo)

            html = f"{html}{newNote}"
            tags = self._addToTags(tags, res.tags)
            if counter - (page - 1) * 50 < 20:
                # todo: title for user notes
                allText = f"{allText} {res.text[:5000]}"
                if res.note_type == "user":
                    allText = f"{allText} {res.title}"


        tags.sort()
        html    = html.replace("`", "&#96;").replace("$", "&#36;")
        pageMax = math.ceil(len(notes) / 50.0)

        if get_index() is not None and get_index().lastResDict is not None:
            get_index().lastResDict["time-html"]                    = int((time.time() - start) * 1000)
            get_index().lastResDict["time-html-highlighting"]       = int(highlight_total * 1000)
            get_index().lastResDict["time-html-build-user-note"]    = int(build_user_note_total * 1000)
        if stamp is None and self.last_took is not None:
            took = self.last_took
            stamp = -1
        elif stamp is not None:
            took = utility.misc.get_milisec_stamp() - stamp
            self.last_took = took
        else:
            took = "?"
        timing      = "true" if timing_info else "false"
        rerender    = "true" if is_rerender else "false"

        if not self.hideSidebar:
            infoMap = {
                "Took" :  "<b>%s</b> ms %s" % (took, "&nbsp;<b style='cursor: pointer' onclick='pycmd(`siac-last-timing`)'><i class='fa fa-info-circle'></i></b>" if timing_info else ""),
                "Found" :  "<b>%s</b> notes" % (len(notes) if len(notes) > 0 else "<span style='color: red;'>0</span>")
            }
            info = self.build_info_table(infoMap, tags, allText)
            cmd = "setSearchResults(`%s`, `%s`, %s, page=%s, pageMax=%s, total=%s, cacheSize=%s, stamp=%s, printTiming=%s, isRerender=%s);" % (html, info[0].replace("`", "&#96;"), json.dumps(info[1]), page, pageMax, len(notes), len(self.previous_calls), stamp, timing, rerender)
        else:
            cmd = "setSearchResults(`%s`, ``, null, page=%s , pageMax=%s, total=%s, cacheSize=%s, stamp=%s, printTiming=%s, isRerender=%s);" % (html, page, pageMax, len(notes), len(self.previous_calls), stamp, timing, rerender)

        self._js(cmd, editor)

        if len(remaining_to_highlight) > 0:
            cmd = ""
            for nid,text in remaining_to_highlight.items():
                cmd = ''.join((cmd, "document.getElementById('siac-inner-card-%s').innerHTML = `%s`;" % (nid, utility.text.mark_highlights(text, query_set))))
            self._js(cmd, editor)

        if len(check_for_suspended) > 0:
            susp = get_suspended(check_for_suspended)
            if len(susp) > 0:
                cmd = ""
                for nid in susp:
                    cmd = f"{cmd}$('#siac-susp-dsp-{nid}').html(`<span id='siac-susp-lbl-{nid}' onclick='pycmd(\"siac-unsuspend-modal {nid}\")' class='siac-susp-lbl'>&nbsp;SUSPENDED&nbsp;</span>`);"
                self._js(cmd, editor)

        if len(pdfs) > 0:
            pdf_info_list = get_pdf_info(pdfs)

            if pdf_info_list is not None and len(pdf_info_list) > 0:
                cmd = ""
                for i in pdf_info_list:

                    perc        = int(i[1] * 10.0 / i[2])
                    prog_bar    = ""

                    for x in range(0, 10):
                        if x < perc:
                            prog_bar = ''.join((prog_bar, "<div class='siac-prog-sq-filled'></div>"))
                        else:
                            prog_bar = ''.join((prog_bar, "<div class='siac-prog-sq'></div>"))
                    cmd = ''.join((cmd, "document.querySelector('#ptmp-%s').innerHTML = `%s <span>%s / %s</span>`;" % (i[0], prog_bar, i[1], i[2])))

                    extract             = ""
                    ext_start           = i[3]
                    ext_end             = i[4]
                    if ext_end and ext_start == ext_end:
                        extract         = f"<span class='siac-extract-mark'> [{ext_start}]</span>"
                    elif ext_start:
                        extract         = f"<span class='siac-extract-mark'> [{ext_start} - {ext_end}]</span>"
                    if extract != "":
                        cmd = ''.join((cmd, "document.querySelector('#siac-ex-tmp-%s').innerHTML = `%s`;" % (i[0], extract)))

                self._js(cmd, editor)

        return (highlight_total * 1000, build_user_note_total)
def search_results(db_list: List[IndexNote], query_set: List[str]) -> HTML:
    """ Prints a list of index notes. Used e.g. in the pdf viewer. """
    html = ""
    newNote = ""
    nids = [r.id for r in db_list]
    show_ret = conf_or_def("showRetentionScores", True)
    fields_to_hide_in_results = conf_or_def("fieldsToHideInResults", {})
    hide_clozes = conf_or_def("results.hide_cloze_brackets", False)
    remove_divs = conf_or_def("removeDivsFromOutput", False)
    if show_ret:
        retsByNid = getRetentions(nids)
    ret = 0
    highlighting = conf_or_def("highlighting", True)

    for counter, res in enumerate(db_list):
        ret = retsByNid[int(
            res.id)] if show_ret and int(res.id) in retsByNid else None
        if ret is not None:
            retMark = "border-color: %s;" % (utility.misc._retToColor(ret))
            retInfo = """<div class='retMark' style='%s'>PR: %s</div> """ % (
                retMark, int(ret))
        else:
            retInfo = ""

        text = res.get_content()

        # hide fields that should not be shown
        if str(res.mid) in fields_to_hide_in_results:
            text = "\u001f".join([
                spl for i, spl in enumerate(text.split("\u001f"))
                if i not in fields_to_hide_in_results[str(res.mid)]
            ])

        #remove <div> tags if set in config
        if remove_divs and res.note_type != "user":
            text = utility.text.remove_divs(text)

        # remove cloze brackets if set in config
        if hide_clozes and res.note_type != "user":
            text = utility.text.hide_cloze_brackets(text)

        if highlighting and query_set is not None:
            text = utility.text.mark_highlights(text, query_set)

        text = utility.text.clean_field_separators(text).replace(
            "\\", "\\\\").replace("`", "\\`").replace("$", "&#36;")
        text = utility.text.try_hide_image_occlusion(text)
        #try to put fields that consist of a single image in their own line
        text = utility.text.newline_before_images(text)
        template = NOTE_TMPL_SIMPLE if res.note_type == "index" else NOTE_TMPL_SIAC_SIMPLE
        newNote = template.format(counter=counter + 1,
                                  nid=res.id,
                                  edited="",
                                  mouseup="",
                                  text=text,
                                  ret=retInfo,
                                  tags=utility.tags.build_tag_string(
                                      res.tags,
                                      False,
                                      False,
                                      maxLength=15,
                                      maxCount=2),
                                  creation="")
        html += newNote
    return html
示例#16
0
    def searchProc(self, text, decks, only_user_notes, print_mode):
        resDict = {}
        start = time.time()
        orig = text
        text = self.clean(text)
        resDict["time-stopwords"] = int((time.time() - start) * 1000)
        if self.logging:
            log("\nFTS index - Received query: " + text)
            log("Decks (arg): " + str(decks))
            log("Self.pinned: " + str(self.pinned))
            log("Self.limit: " + str(self.limit))
        self.lastSearch = (text, decks, "default")

        if len(text) == 0:
            if print_mode == "default":
                self.output.editor.web.eval(
                    "setSearchResults(``, 'Query was empty after cleaning.<br/><br/><b>Query:</b> <i>%s</i>')"
                    % utility.text.trim_if_longer_than(orig, 100).replace(
                        "\u001f", ""))
                if mw.addonManager.getConfig(__name__)["hideSidebar"]:
                    return "Found 0 notes. Query was empty after cleaning."
                return None
            elif print_mode == "pdf":
                return None

        start = time.time()
        text = utility.text.expand_by_synonyms(text, self.synonyms)
        resDict["time-synonyms"] = int((time.time() - start) * 1000)
        resDict["query"] = text
        if utility.text.text_too_small(text):
            if self.logging:
                log("Returning - Text was < 2 chars: " + text)
            return {"results": []}

        tokens = text.split(" ")
        if len(tokens) > 10:
            tokens = set(tokens)
        if self.type == "SQLite FTS5":
            query = u" OR ".join([
                "tags:" + s.strip().replace("OR", "or") for s in tokens
                if not utility.text.text_too_small(s)
            ])
            query += " OR " + " OR ".join([
                "text:" + s.strip().replace("OR", "or")
                for s in tokens if not utility.text.text_too_small(s)
            ])
        else:
            query = " OR ".join([
                s.strip().replace("OR", "or") for s in tokens
                if not utility.text.text_too_small(s)
            ])
        if len(query) == 0 or query == " OR ":
            if self.logging:
                log("Returning. Query was: " + query)
            return {"results": []}

        c = 0
        resDict["decks"] = decks
        allDecks = "-1" in decks
        decks.append("-1")
        rList = list()
        user_note_filter = "AND mid='-1'" if only_user_notes else ""
        conn = sqlite3.connect(self.dir + "search-data.db")
        if self.type == "SQLite FTS5":
            dbStr = "select nid, text, tags, did, source, bm25(notes) as score, mid, refs from notes where notes match '%s' %s order by score" % (
                query, user_note_filter)

        #bm25 results in really slow queries for some reason, so we use the simpler ranking for fts4

        # elif self.type == "SQLite FTS4":
        #     conn.create_function("bm25", 1, bm25)
        #     dbStr = "select nid, text, tags, did, source, bm25(matchinfo(notes, 'pcnalx')) as score, mid, refs from notes where text match '%s' %s order by score desc" %(query, user_note_filter)
        else:
            conn.create_function("simple_rank", 1, simple_rank)
            dbStr = "select nid, text, tags, did, source, simple_rank(matchinfo(notes)) as score, mid, refs from notes where text match '%s' %s order by score desc" % (
                query, user_note_filter)

        try:
            start = time.time()
            res = conn.execute(dbStr).fetchall()
            resDict["time-query"] = int((time.time() - start) * 1000)
        except Exception as e:
            if self.logging:
                log("Executing db query threw exception: " + str(e))
            res = []
        if self.logging:
            log("dbStr was: " + dbStr)
            log("Result length of db query: " + str(len(res)))

        resDict["highlighting"] = self.highlighting
        # if self.type == "SQLite FTS5":
        for r in res:
            if not str(r[0]) in self.pinned and (allDecks
                                                 or str(r[3]) in decks):
                rList.append((r[4], r[2], r[3], r[0], r[5], r[6], r[7]))
                c += 1
                if c >= self.limit:
                    break

        # else:
        #     start = time.time()
        #     for r in res:
        #         if not str(r[0]) in self.pinned and (allDecks or str(r[3]) in decks):
        #             rList.append((r[4], r[2], r[3], r[0], r[5], r[6], r[7]))
        #     resDict["time-ranking"] = int((time.time() - start) * 1000)

        # else:
        #     start = time.time()
        #     for r in res:
        #         if not str(r[0]) in self.pinned and (allDecks or str(r[3]) in decks):
        #             rList.append((r[4], r[2], r[3], r[0], r[5], r[6], r[7]))
        #     resDict["time-ranking"] = int((time.time() - start) * 1000)

        conn.close()

        #if fts5 is not used, results are not sorted by score
        # if not self.type == "SQLite FTS5":
        #     listSorted = sorted(rList, key=lambda x: x[4])
        #     rList = listSorted
        if self.logging:
            log("Query was: " + query)
            log("Result length (after removing pinned and unselected decks): "
                + str(len(rList)))
        resDict["results"] = rList[:min(self.limit, len(rList))]
        self.lastResDict = resDict
        return resDict