def recoll_initsearch(q): config = get_config() if config['extradbs']: db = recoll.connect(config['confdir'], extra_dbs=config['extradbs']) else: db = recoll.connect(config['confdir']) db.setAbstractParams(config['maxchars'], config['context']) query = db.query() query.sortby(q['sort'], q['ascending']) try: qs = query_to_recoll_string(q) query.execute(qs, config['stem'], config['stemlang']) except: pass return query
def _fetchalldocs(self, confdir): start = timer() rcldb = recoll.connect(confdir=confdir) rclq = rcldb.query() rclq.execute("mime:*", stemming=0) #rclq.execute('album:a* OR album:b* OR album:c*', stemming=0) uplog("Estimated alldocs query results: %d" % (rclq.rowcount)) tagaliases = None if uprclinit.g_minimconfig: tagaliases = uprclinit.g_minimconfig.gettagaliases() self._rcldocs = [] for doc in rclq: if tagaliases: for orig, target, rep in tagaliases: val = doc.get(orig) #uplog("Rep %s doc[%s]=[%s] doc[%s]=[%s]"% # (rep, orig, val, target, doc.get(target))) if val and (rep or not doc.get(target)): setattr(doc, target, val) self._rcldocs.append(doc) if self._maxrclcnt > 0 and len(self._rcldocs) >= self._maxrclcnt: break time.sleep(0) end = timer() uplog("Retrieved %d docs in %.2f Seconds" % (len(self._rcldocs), end - start))
def recoll_complete(pattern): """Use the recoll desktop search tools index to quicly locate all filenames maching the pattern, whatever the location on the filesystem """ # 1 or 2 characters would bring way too much results if len(pattern) <= 2: return db = recoll.connect() search_data = recoll.SearchData() search_data.addclause('filename', pattern + '*') query = db.query() nres = query.executesd(search_data) for _ in range(nres): item = query.fetchone() uri = urlparse.urlparse(item.get('url')) if uri.scheme != 'file': continue path = urllib.unquote(uri.path) # The recoll database is not garantued to be up to date # so we also test if the file really exists if (os.path.basename(path).startswith(pattern) and os.path.exists(path)): yield path
def __init__(self, result_limit, results_ready_cb=None, debug=True): # Connecto to the RECOLL session self._connection = recoll.connect() self._result_limit = result_limit self._debug = debug self._results_ready_cb = results_ready_cb self._thread = None self.name = 'Recoll Multiprocess'
def recoll_initsearch(config, query, dir, sort, ascending): db = recoll.connect() db.setAbstractParams(config['maxchars'], config['context']) q = db.query() q.sortby(sort, ascending) qs = build_query_string(query, dir) q.execute(qs, config['stem'], config['stemlang']) return q
def recoll_initsearch(q): config = get_config() confdir = config['confdir'] dbs = [] """ The reason for this somewhat elaborate scheme is to keep the set size as small as possible by searching only those databases with matching topdirs """ if q['dir'] == '<all>': if config['extraconfdirs']: dbs.extend(map(get_dbdir, config['extraconfdirs'])) else: confdirs = [] for d, conf in config['dirs'].items(): tdbasename = os.path.basename(d) if os.path.commonprefix([tdbasename, q['dir']]) == tdbasename: confdirs.append(conf) if len(confdirs) == 0: # should not happen, using non-existing q['dir']? bottle.abort( 400, 'no matching database for search directory ' + q['dir']) elif len(confdirs) == 1: # only one config (most common situation) confdir = confdirs[0] else: # more than one config with matching topdir, use 'm all confdir = confdirs[0] dbs.extend(map(get_dbdir, confdirs[1:])) if config['extradbs']: dbs.extend(config['extradbs']) if dbs: db = recoll.connect(confdir, dbs) else: db = recoll.connect(confdir) db.setAbstractParams(config['maxchars'], config['context']) query = db.query() query.sortby(q['sort'], q['ascending']) try: qs = query_to_recoll_string(q) query.execute(qs, config['stem'], config['stemlang']) except Exception as ex: msg("Query execute failed: %s" % ex) pass return query
def recoll_initsearch(q): config = get_config() db = recoll.connect(config['confdir']) db.setAbstractParams(config['maxchars'], config['context']) query = db.query() query.sortby(q['sort'], q['ascending']) try: qs = query_to_recoll_string(q) query.execute(qs, config['stem'], config['stemlang']) except: pass return query
def Query(self, queryStr): db = recoll.connect() queryObj = db.query() nres = queryObj.execute(queryStr) results = queryObj.fetchmany(500) rtn = [] for doc in results: doc.url = str(doc.url).replace('file://', '') dic = {} for key in doc.keys(): dic[key] = doc[key] rtn.append(dic) return rtn
def index_mboxdir(dir): db = recoll.connect(confdir=rclconf, writable=1) entries = glob.glob(dir + "/*") for ent in entries: if '.' in os.path.basename(ent): # skip .log etc. our mboxes have no exts continue if not os.path.isfile(ent): continue print("Processing %s" % ent) mbidx = mbox_indexer(db, ent) mbidx.index() db.purge()
def fetchalldocs(confdir): allthedocs = [] rcldb = recoll.connect(confdir=confdir) rclq = rcldb.query() rclq.execute("mime:*", stemming=0) uplog("Estimated alldocs query results: %d" % (rclq.rowcount)) maxcnt = 1000 totcnt = 0 while True: docs = rclq.fetchmany() for doc in docs: allthedocs.append(doc) totcnt += 1 if (maxcnt > 0 and totcnt >= maxcnt) or \ len(docs) != rclq.arraysize: break uplog("Retrieved %d docs" % (totcnt, )) return allthedocs
def __init__(self, cfgDir, extra_dbs): QtCore.QAbstractListModel.__init__(self) self.totres = -1 self.query = None self.qtext = "" self.searchResults = [] self.pagelen = 6 self.attrs = ("filename", "title", "mtime", "url", "ipath") # TODO read from config and prepare db # TODO confdir = CONFIG_DIR confdir = cfgDir extra_dbs = extra_dbs # Snippet params maxchars = 300 contextwords = 6 self.db = recoll.connect(confdir=confdir, extra_dbs=extra_dbs) self.db.setAbstractParams(maxchars=maxchars, contextwords=contextwords)
def search(foldersobj, rclconfdir, objid, upnps, idprefix, httphp, pathprefix): rcls = _upnpsearchtorecoll(upnps) filterdir = foldersobj.dirpath(objid) if filterdir and filterdir != "/": rcls += " dir:\"" + filterdir + "\"" uplog("Search: recoll search: <%s>" % rcls) rcldb = recoll.connect(confdir=rclconfdir) try: rclq = rcldb.query() rclq.execute(rcls) except Exception as e: uplog("Search: recoll query raised: %s" % e) return [] uplog("Estimated query results: %d" % (rclq.rowcount)) if rclq.rowcount == 0: return [] entries = [] maxcnt = 0 while True: docs = rclq.fetchmany() for doc in docs: arturi = uprclutils.docarturi(doc, httphp, pathprefix) if arturi: # The uri is quoted, so it's ascii and we can just store # it as a doc attribute doc.albumarturi = arturi id = foldersobj.objidfordoc(doc) e = uprclutils.rcldoctoentry(id, objid, httphp, pathprefix, doc) if e: entries.append(e) if (maxcnt > 0 and len(entries) >= maxcnt) or \ len(docs) != rclq.arraysize: break uplog("Search retrieved %d docs" % (len(entries),)) entries.sort(key=uprclutils.cmpentries) return entries
def _fetchalldocs(confdir): start = timer() allthedocs = [] rcldb = recoll.connect(confdir=confdir) rclq = rcldb.query() rclq.execute("mime:*", stemming=0) uplog("Estimated alldocs query results: %d" % (rclq.rowcount)) totcnt = 0 while True: docs = rclq.fetchmany() for doc in docs: allthedocs.append(doc) totcnt += 1 if (_maxrclcnt > 0 and totcnt >= _maxrclcnt) or \ len(docs) != rclq.arraysize: break time.sleep(0) end = timer() uplog("Retrieved %d docs in %.2f Seconds" % (totcnt, end - start)) return allthedocs
def main(args): app = QtGui.QApplication(args) confdir="" extra_dbs = [] # Snippet params maxchars = 300 contextwords = 6 # Process options: [-c confdir] [-i extra_db [-i extra_db] ...] options, args = getopt(args[1:], "c:i:") for opt,val in options: if opt == "-c": confdir = val elif opt == "-i": extra_dbs.append(val) else: print >> sys.stderr, "Bad opt: ", opt Usage() # The query should be in the remaining arg(s) q = None if len(args) > 0: q = "" for word in args: q += word + " " db = recoll.connect(confdir=confdir, extra_dbs=extra_dbs) db.setAbstractParams(maxchars=maxchars, contextwords=contextwords) topwindow = RclGui_Main(db) topwindow.show() if q is not None: topwindow.ui.searchEntry.setText(q) topwindow.startQuery() sys.exit(app.exec_())
def main(args): app = QtGui.QApplication(args) confdir = "" extra_dbs = [] # Snippet params maxchars = 300 contextwords = 6 # Process options: [-c confdir] [-i extra_db [-i extra_db] ...] options, args = getopt(args[1:], "c:i:") for opt, val in options: if opt == "-c": confdir = val elif opt == "-i": extra_dbs.append(val) else: print >> sys.stderr, "Bad opt: ", opt Usage() # The query should be in the remaining arg(s) q = None if len(args) > 0: q = "" for word in args: q += word + " " db = recoll.connect(confdir=confdir, extra_dbs=extra_dbs) db.setAbstractParams(maxchars=maxchars, contextwords=contextwords) topwindow = RclGui_Main(db) topwindow.show() if q is not None: topwindow.ui.searchEntry.setText(q) topwindow.startQuery() sys.exit(app.exec_())
def search(rclconfdir, objid, upnps, idprefix, httphp, pathprefix): rcls = upnpsearchtorecoll(upnps) filterdir = uprclfolders.dirpath(objid) if filterdir and filterdir != "/": rcls += " dir:\"" + filterdir + "\"" uplog("Search: recoll search: <%s>" % rcls) rcldb = recoll.connect(confdir=rclconfdir) try: rclq = rcldb.query() rclq.execute(rcls) except Exception as e: uplog("Search: recoll query raised: %s" % e) return [] uplog("Estimated query results: %d" % (rclq.rowcount)) if rclq.rowcount == 0: return [] entries = [] maxcnt = 0 while True: docs = rclq.fetchmany() for doc in docs: id = idprefix + '$' + 'seeyoulater' e = rcldoctoentry(id, objid, httphp, pathprefix, doc) if e: entries.append(e) if (maxcnt > 0 and len(entries) >= maxcnt) or \ len(docs) != rclq.arraysize: break uplog("Search retrieved %d docs" % (len(entries), )) return sorted(entries, cmp=cmpentries)
def __init__(self, result_limit, results_ready_cb=None, debug=True): _RecollCommon.__init__(self, recoll.connect(), result_limit, debug) self._results_ready_cb = results_ready_cb self.name = 'Recoll No Thread' self._tag = None
language, localecharset = locale.getdefaultlocale() confdir="" extra_dbs = [] # Snippet params maxchars = 120 contextwords = 4 # Process options: [-c confdir] [-i extra_db [-i extra_db] ...] options, args = getopt(sys.argv[1:], "c:i:") for opt,val in options: if opt == "-c": confdir = val elif opt == "-i": extra_dbs.append(val) else: print("Bad opt: %s"%(opt,)) Usage() # The query should be in the remaining arg(s) if len(args) == 0: print("No query found in command line") Usage() q = '' for word in args: q += word + ' ' print("QUERY: [%s]"%(q,)) db = recoll.connect(confdir=confdir, extra_dbs=extra_dbs) db.setAbstractParams(maxchars=maxchars, contextwords=contextwords) doquery(db, q)
def __init__(self, result_limit, results_ready_cb=None, debug=True): _RecollCommon.__init__(self, recoll.connect(), result_limit, debug) self._query_timer = None self._results_ready_cb = results_ready_cb self.name = 'Recoll'
nres = 10 for i in range(nres): doc = query.fetchone() print query.next if type(query.next) == int else query.rownumber for k in ("url", "mtime", "title", "author", "abstract"): if getattr(doc, k): print k, ":", getattr(doc, k).encode('utf-8') else: print k, ": None" print # End dotest # addclause(type='and'|'or'|'filename'|'phrase'|'near'|'path'|'sub' # qstring=string, slack=int, field=string, stemming=1|0, # subSearch=SearchData, exclude=0|1, anchorstart=0|1, anchorend=0|1 # casesens=0|1, diacsens=0|1) #sd.addclause("and", "dockes", field="author") #sd.addclause("phrase", "jean francois", 1) db = recoll.connect(confdir="/home/dockes/.recoll-prod") sd = recoll.SearchData(stemlang="english") sd.addclause('filename', "recollqsd*") dotest(db, sd) sys.exit(0)
from recoll import rclextract if sys.version_info[0] >= 3: ISP3 = True else: ISP3 = False def utf8string(s): if ISP3: return s else: return s.encode('utf8') db = recoll.connect() query = db.query() # This normally has only one result, a well-known html file nres = query.execute("HtmlAttachment_uniqueTerm", stemming=0) print("Result count: %d %d" % (nres, query.rowcount)) doc = query.fetchone() xtrac = rclextract.Extractor(doc) doc = xtrac.textextract(doc.ipath) print("Text length: %d" % len(doc.text)) refdigest = 'bfbb63f7a245c31767585b45014dbd07' # This normally has 2 results, one of which is a pdf attachment. nres = query.execute("population_size_cultural_transmission", stemming=0) for doc in query:
# text text = u"" text += u"From: " + header_value(msg, "From") + u"\n" text += u"To: " + header_value(msg, "To") + u"\n" text += u"Subject: " + header_value(msg, "Subject") + u"\n" #text += u"Message-ID: " + header_value(msg, "Message-ID") + u"\n" text += u"\n" for part in msg.walk(): if part.is_multipart(): pass else: ct = part.get_content_type() if ct.lower() == "text/plain": charset = part.get_content_charset("iso-8859-1") #print "charset: ", charset #print "text: ", part.get_payload(None, True) text += unicode(part.get_payload(None, True), charset) doc.text = text # dbytes doc.dbytes = str(len(text)) # sig doc.sig = self.sig() udi = self.udi(self.msgnum) db.addOrUpdate(udi, doc) db = recoll.connect(confdir=rclconf, writable=1) mbidx = mbox_indexer(mbfile) mbidx.index(db)
print "Query: ", query.getxquery().encode('utf-8') if nres > 10: nres = 10 for i in range(nres): doc = query.fetchone() print query.next if type(query.next) == int else query.rownumber for k in ("url", "mtime", "title", "author", "abstract"): if getattr(doc, k): print k, ":", getattr(doc, k).encode('utf-8') else: print k, ": None" print # End dotest # addclause(type='and'|'or'|'filename'|'phrase'|'near'|'path'|'sub' # qstring=string, slack=int, field=string, stemming=1|0, # subSearch=SearchData, exclude=0|1, anchorstart=0|1, anchorend=0|1 # casesens=0|1, diacsens=0|1) #sd.addclause("and", "dockes", field="author") #sd.addclause("phrase", "jean francois", 1) db = recoll.connect(confdir="/home/dockes/.recoll-prod") sd = recoll.SearchData(stemlang="english") sd.addclause('filename', "recollqsd*") dotest(db, sd) sys.exit(0)
def on_event(self, event, extension): keyword = event.get_keyword() preferences = extension.preferences query_words = event.get_argument() if query_words == None: query_words = "" if preferences["cb_lib_path"] == 'default': try: with open(home + '/.config/calibre/global.py') as f: text = f.readlines() for i in text: if 'library_path' in i: calibre_lib_path = i.strip()[17:-1] except FileNotFoundError: pass if keyword == preferences["cb_kw"]: import sqlite3 if not preferences["cb_lib_path"] == 'default': calibre_lib_path = preferences["cb_lib_path"] if preferences["cb_lib_path"][-1] == '/': conn = sqlite3.connect(preferences["cb_lib_path"]+"metadata.db") else: conn = sqlite3.connect(preferences["cb_lib_path"]+"/metadata.db") else: print(calibre_lib_path+"/metadata.db") conn = sqlite3.connect(calibre_lib_path+"/metadata.db") c = conn.cursor() queries = query_words.split() if len(queries) == 1: results = c.execute('select title, author_sort, path from books where (title like "%{}%" or author_sort like "%{}%") limit 10'.format(queries[0], queries[0])) elif len(queries) == 2: results = c.execute('select title, author_sort, path from books where (title like "%{}%" or author_sort like "%{}%") and id in (select id from books where title like "%{}%" or author_sort like "%{}%")'.format(queries[1], queries[1], queries[0], queries[0])) items = [] for i in results: cover ='images/gnome.png', pad = calibre_lib_path + '/{}'.format(i[2]) for f in os.listdir(pad): if f.split('.')[-1] in ['pdf', 'djvu', 'epub']: filepath = os.path.join(pad, f) print('FILE =', filepath) if f.endswith(".jpg"): cover = os.path.join(pad, f) print("cover = ", cover) data = '%s' %filepath items.append(ExtensionResultItem(icon= '%s' %cover, name='%s' %i[0], description="%s" %i[1], on_enter=ExtensionCustomAction(data, keep_app_open=True))) elif keyword == preferences["rc_kw"]: from recoll import recoll db = recoll.connect() query = db.query() query_words_list = query_words.split() if not 'g' in query_words_list[:-1]: res = query.execute(query_words) if res < 200: result_list = query.fetchmany(res) results = [[doc.filename, query.makedocabstract(doc)[:80], doc.url] for doc in result_list[:15]] else: result_list = query.fetchmany(200) results = [[doc.filename, query.makedocabstract(doc)[:80], doc.url] for doc in result_list[:15]] else: query.execute(' '.join(query_words_list[:query_words_list.index('g')])) if res < 200: result_list = query.fetchmany(res) results = [[doc.filename, query.makedocabstract(doc)[:80], doc.url] for doc in result_list if query_words_list[-1].lower() in doc.filename.lower()] else: result_list = query.fetchmany(200) results = [[doc.filename, query.makedocabstract(doc)[:80], doc.url] for doc in result_list if query_words_list[-1].lower() in doc.filename.lower()] #results = sorted(output, key=lambda entry: entry[2])[::-1] items = [] for i in results: data = '%s' %i[2] items.append(ExtensionResultItem(icon='images/recoll.png', name='%s' %i[0], description="%s" %i[1], on_enter=ExtensionCustomAction(data, keep_app_open=True))) elif keyword == preferences["df_kw"]: from search import search out = search(query_words,28834) output = [[doc.getFilename(),doc.getPathStr(),doc.getLastModifiedStr()] for doc in out] results = sorted(output, key=lambda entry: entry[2])[::-1] items = [] for i in results: data = '%s' %i[1] items.append(ExtensionResultItem(icon='images/docfetcher.png', name='%s' %i[0], description="%s" %i[1], on_enter=ExtensionCustomAction(data, keep_app_open=True))) else: if keyword == preferences["gt_kw"]: query_words_list = query_words.split() if preferences["autowildcardsearch"] == 'Yes': if " " in query_words: query_words = "*".join(query_words.split(' ')) + "*" else: if not 'g' in query_words_list[:-1]: query_words = query_words else: query_words = ' '.join(query_words_list[:query_words_list.index('g')]) command = ['tracker3', 'sparql', '-q', "SELECT nfo:fileName(?f) nie:url(?f) WHERE { ?f nie:url ?url FILTER(fn:starts-with(?url, \'file://" + home + "/\')) . ?f fts:match '"+query_words+"' } ORDER BY nfo:fileLastAccessed(?f)"] # command = ['tracker3', 'sparql', '-q', "SELECT nfo:fileName(?f) nie:url(?f) WHERE { ?f nie:url ?url FILTER(fn:starts-with(?url, \'file://" + home + "/\')) . ?f nie:plainTextContent ?w FILTER regex(?w, '"+query_words+"', 'i') }"] output = subprocess.check_output(command, encoding='UTF-8') print('HALLO', output+'\n') if not 'g' in query_words_list[:-1]: pre_results = [i.split(', ') for i in output.splitlines()][::-1][1:-1][:20] else: pre_results = [i.split(', ') for i in output.splitlines()[1:-1] if query_words_list[-1].lower() in i][::-1][:20] print("RES",pre_results) results = [[pre_results[i][0][2:],pre_results[i][1][7:]] for i in range(len(pre_results))] elif keyword == preferences["ts_kw"]: import re out1 = subprocess.check_output(['tracker3','search',query_words], encoding='UTF-8') out2 = [i for i in out1.splitlines()] out3 = [re.sub('\x1b[^m]*m', '', i).strip() for i in out2[1:]] pre_results = list(chunks(out3,3))[:-1] print(pre_results) results = [[pre_results[i][1],pre_results[i][0][7:]] for i in range(len(pre_results))] elif keyword == preferences["lc_kw"]: words = query_words.split(' ') # first_part = words.index("g") if len(words) == 1: output = subprocess.check_output(['locate', '-i', '-l', '11', '-r', query_words], encoding='UTF-8') pre_results = output.splitlines() results = [[os.path.basename(i),i] for i in pre_results] elif preferences["autowildcardsearch"] == 'No': if len(words) == 3 and words[1] == 'g': loc = subprocess.Popen(['locate', '-i', '-l', '100000', '-r', words[0]], stdout=subprocess.PIPE) #output = subprocess.run(['grep','-i', '-m','11', 'rey'], input=loc.stdout, capture_output=True) output = subprocess.check_output(['grep','-i', '-m','11', words[2]], stdin=loc.stdout, encoding='UTF-8') pre_results = output.splitlines() results = [[os.path.basename(i),i] for i in pre_results] elif len(words) == 5 and words[1] == 'g' and words [3] == 'g': loc = subprocess.Popen(['locate', '-i', '-l', '100000', '-r', words[0]], stdout=subprocess.PIPE) #output = subprocess.run(['grep','-i', '-m','11', 'rey'], input=loc.stdout, capture_output=True) grep1 = subprocess.Popen(['grep','-i', words[2]], stdin=loc.stdout, stdout=subprocess.PIPE) output = subprocess.check_output(['grep', '-i', '-m','11', words[4]], stdin=grep1.stdout, encoding='UTF-8') pre_results = output.splitlines() results = [[os.path.basename(i),i] for i in pre_results] # Do auto wildcard search if enabled in preferences else: output = subprocess.check_output(['locate', '-i', '-l', '11', '-r', ".*" + ".*".join(words) + ".*"], encoding='UTF-8') pre_results = output.splitlines() results = [[os.path.basename(i),i] for i in pre_results] items = [] for i in results: data = '%s' %i[1] print(data) items.append(ExtensionResultItem(icon='images/gnome.png', name='%s' %i[0], description="%s" %i[1], on_enter=ExtensionCustomAction(data, keep_app_open=True))) return RenderResultListAction(items)